LLVM  12.0.1
PPCISelLowering.cpp
Go to the documentation of this file.
1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the PPCISelLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCISelLowering.h"
15 #include "PPC.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/IntrinsicsPowerPC.h"
70 #include "llvm/IR/Module.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/IR/Use.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/MC/MCContext.h"
75 #include "llvm/MC/MCExpr.h"
76 #include "llvm/MC/MCRegisterInfo.h"
77 #include "llvm/MC/MCSectionXCOFF.h"
78 #include "llvm/MC/MCSymbolXCOFF.h"
81 #include "llvm/Support/Casting.h"
82 #include "llvm/Support/CodeGen.h"
84 #include "llvm/Support/Compiler.h"
85 #include "llvm/Support/Debug.h"
87 #include "llvm/Support/Format.h"
88 #include "llvm/Support/KnownBits.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <cstdint>
97 #include <iterator>
98 #include <list>
99 #include <utility>
100 #include <vector>
101 
102 using namespace llvm;
103 
104 #define DEBUG_TYPE "ppc-lowering"
105 
106 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108 
109 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111 
112 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114 
115 static cl::opt<bool> DisableSCO("disable-ppc-sco",
116 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117 
118 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120 
121 static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122 cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123 
124 // TODO - Remove this option if soft fp128 has been fully supported .
125 static cl::opt<bool>
126  EnableSoftFP128("enable-soft-fp128",
127  cl::desc("temp option to enable soft fp128"), cl::Hidden);
128 
129 STATISTIC(NumTailCalls, "Number of tail calls");
130 STATISTIC(NumSiblingCalls, "Number of sibling calls");
131 STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
132 STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
133 
134 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
135 
136 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
137 
138 // FIXME: Remove this once the bug has been fixed!
140 
142  const PPCSubtarget &STI)
143  : TargetLowering(TM), Subtarget(STI) {
144  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
145  // arguments are at least 4/8 bytes aligned.
146  bool isPPC64 = Subtarget.isPPC64();
147  setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
148 
149  // Set up the register classes.
150  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
151  if (!useSoftFloat()) {
152  if (hasSPE()) {
153  addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
154  // EFPU2 APU only supports f32
155  if (!Subtarget.hasEFPU2())
156  addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
157  } else {
158  addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
159  addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
160  }
161  }
162 
163  // Match BITREVERSE to customized fast code sequence in the td file.
166 
167  // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
169 
170  // Custom lower inline assembly to check for special registers.
173 
174  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
175  for (MVT VT : MVT::integer_valuetypes()) {
178  }
179 
180  if (Subtarget.isISA3_0()) {
185  } else {
186  // No extending loads from f16 or HW conversions back and forth.
195  }
196 
198 
199  // PowerPC has pre-inc load and store's.
210  if (!Subtarget.hasSPE()) {
215  }
216 
217  // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
218  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
219  for (MVT VT : ScalarIntVTs) {
224  }
225 
226  if (Subtarget.useCRBits()) {
228 
229  if (isPPC64 || Subtarget.hasFPCVT()) {
232  isPPC64 ? MVT::i64 : MVT::i32);
235  isPPC64 ? MVT::i64 : MVT::i32);
236 
239  isPPC64 ? MVT::i64 : MVT::i32);
242  isPPC64 ? MVT::i64 : MVT::i32);
243 
246  isPPC64 ? MVT::i64 : MVT::i32);
249  isPPC64 ? MVT::i64 : MVT::i32);
250 
253  isPPC64 ? MVT::i64 : MVT::i32);
256  isPPC64 ? MVT::i64 : MVT::i32);
257  } else {
262  }
263 
264  // PowerPC does not support direct load/store of condition registers.
267 
268  // FIXME: Remove this once the ANDI glue bug is fixed:
269  if (ANDIGlueBug)
271 
272  for (MVT VT : MVT::integer_valuetypes()) {
276  }
277 
278  addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
279  }
280 
281  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
282  // PPC (the libcall is not available).
287 
288  // We do not currently implement these libm ops for PowerPC.
295 
296  // PowerPC has no SREM/UREM instructions unless we are on P9
297  // On P9 we may use a hardware instruction to compute the remainder.
298  // When the result of both the remainder and the division is required it is
299  // more efficient to compute the remainder from the result of the division
300  // rather than use the remainder instruction. The instructions are legalized
301  // directly because the DivRemPairsPass performs the transformation at the IR
302  // level.
303  if (Subtarget.isISA3_0()) {
308  } else {
313  }
314 
315  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
324 
325  // Handle constrained floating-point operations of scalar.
326  // TODO: Handle SPE specific operation.
333 
339  if (Subtarget.hasVSX()) {
342  }
343 
344  if (Subtarget.hasFSQRT()) {
347  }
348 
349  if (Subtarget.hasFPRND()) {
354 
359  }
360 
361  // We don't support sin/cos/sqrt/fmod/pow
372  if (Subtarget.hasSPE()) {
375  } else {
378  }
379 
380  if (Subtarget.hasSPE())
382 
384 
385  // If we're enabling GP optimizations, use hardware square root
386  if (!Subtarget.hasFSQRT() &&
387  !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
388  Subtarget.hasFRE()))
390 
391  if (!Subtarget.hasFSQRT() &&
392  !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
393  Subtarget.hasFRES()))
395 
396  if (Subtarget.hasFCPSGN()) {
399  } else {
402  }
403 
404  if (Subtarget.hasFPRND()) {
409 
414  }
415 
416  // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
417  // to speed up scalar BSWAP64.
418  // CTPOP or CTTZ were introduced in P8/P9 respectively
420  if (Subtarget.hasP9Vector())
422  else
424  if (Subtarget.isISA3_0()) {
427  } else {
430  }
431 
432  if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
435  } else {
438  }
439 
440  // PowerPC does not have ROTR
443 
444  if (!Subtarget.useCRBits()) {
445  // PowerPC does not have Select
450  }
451 
452  // PowerPC wants to turn select_cc of FP into fsel when possible.
455 
456  // PowerPC wants to optimize integer setcc a bit
457  if (!Subtarget.useCRBits())
459 
460  if (Subtarget.hasFPU()) {
464 
468  }
469 
470  // PowerPC does not have BRCOND which requires SetCC
471  if (!Subtarget.useCRBits())
473 
475 
476  if (Subtarget.hasSPE()) {
477  // SPE has built-in conversions
484  } else {
485  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
488 
489  // PowerPC does not have [U|S]INT_TO_FP
494  }
495 
496  if (Subtarget.hasDirectMove() && isPPC64) {
501  if (TM.Options.UnsafeFPMath) {
510  }
511  } else {
516  }
517 
518  // We cannot sextinreg(i1). Expand to shifts.
520 
521  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
522  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
523  // support continuation, user-level threading, and etc.. As a result, no
524  // other SjLj exception interfaces are implemented and please don't build
525  // your own exception handling based on them.
526  // LLVM/Clang supports zero-cost DWARF exception handling.
529 
530  // We want to legalize GlobalAddress and ConstantPool nodes into the
531  // appropriate instructions to materialize the address.
542 
543  // TRAP is legal.
545 
546  // TRAMPOLINE is custom lowered.
549 
550  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
552 
553  if (Subtarget.is64BitELFABI()) {
554  // VAARG always uses double-word chunks, so promote anything smaller.
564  } else if (Subtarget.is32BitELFABI()) {
565  // VAARG is custom lowered with the 32-bit SVR4 ABI.
568  } else
570 
571  // VACOPY is custom lowered with the 32-bit SVR4 ABI.
572  if (Subtarget.is32BitELFABI())
574  else
576 
577  // Use the default implementation.
587 
588  // We want to custom lower some of our intrinsics.
590 
591  // To handle counter-based loop conditions.
593 
598 
599  // Comparisons that require checking two conditions.
600  if (Subtarget.hasSPE()) {
605  }
618 
621 
622  if (Subtarget.has64BitSupport()) {
623  // They also have instructions for converting between i64 and fp.
632  // This is just the low 32 bits of a (signed) fp->i64 conversion.
633  // We cannot do this with Promote because i64 is not a legal type.
636 
637  if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
640  }
641  } else {
642  // PowerPC does not have FP_TO_UINT on 32-bit implementations.
643  if (Subtarget.hasSPE()) {
646  } else {
649  }
650  }
651 
652  // With the instructions enabled under FPCVT, we can do everything.
653  if (Subtarget.hasFPCVT()) {
654  if (Subtarget.has64BitSupport()) {
663  }
664 
673  }
674 
675  if (Subtarget.use64BitRegs()) {
676  // 64-bit PowerPC implementations can support i64 types directly
677  addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
678  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
680  // 64-bit PowerPC wants to expand i128 shifts itself.
684  } else {
685  // 32-bit PowerPC wants to expand i64 shifts itself.
689  }
690 
691  // PowerPC has better expansions for funnel shifts than the generic
692  // TargetLowering::expandFunnelShift.
693  if (Subtarget.has64BitSupport()) {
696  }
699 
700  if (Subtarget.hasVSX()) {
705  }
706 
707  if (Subtarget.hasAltivec()) {
708  for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
713  }
714  // First set operation action for all vector types to expand. Then we
715  // will selectively turn on ones that can be effectively codegen'd.
716  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
717  // add/sub are legal for all supported vector VT's.
720 
721  // For v2i64, these are only valid with P8Vector. This is corrected after
722  // the loop.
723  if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
728  }
729  else {
734  }
735 
736  if (Subtarget.hasVSX()) {
739  }
740 
741  // Vector instructions introduced in P8
742  if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
745  }
746  else {
749  }
750 
751  // Vector instructions introduced in P9
752  if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
754  else
756 
757  // We promote all shuffles to v16i8.
760 
761  // We promote all non-typed operations to v4i32.
777 
778  // No other operations are legal.
816 
817  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
818  setTruncStoreAction(VT, InnerVT, Expand);
819  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
820  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
821  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
822  }
823  }
825  if (!Subtarget.hasP8Vector()) {
830  }
831 
832  // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
833  // with merges, splats, etc.
835 
836  // Vector truncates to sub-word integer that fit in an Altivec/VSX register
837  // are cheap, so handle them before they get expanded to scalar.
843 
849  Subtarget.useCRBits() ? Legal : Expand);
863 
864  // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
866  // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
867  if (Subtarget.hasAltivec())
868  for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
870  // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
871  if (Subtarget.hasP8Altivec())
873 
874  addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
875  addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
876  addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
877  addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
878 
881 
882  if (Subtarget.hasVSX()) {
885  }
886 
887  if (Subtarget.hasP8Altivec())
889  else
891 
892  if (Subtarget.isISA3_1()) {
911  }
912 
915 
918 
923 
924  // Altivec does not contain unordered floating-point compare instructions
929 
930  if (Subtarget.hasVSX()) {
933  if (Subtarget.hasP8Vector()) {
936  }
937  if (Subtarget.hasDirectMove() && isPPC64) {
946  }
948 
949  // The nearbyint variants are not allowed to raise the inexact exception
950  // so we can only code-gen them with unsafe math.
951  if (TM.Options.UnsafeFPMath) {
954  }
955 
964 
970 
973 
976 
977  // Share the Altivec comparison restrictions.
982 
985 
987 
988  if (Subtarget.hasP8Vector())
989  addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
990 
991  addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
992 
993  addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
994  addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
995  addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
996 
997  if (Subtarget.hasP8Altivec()) {
1001 
1002  // 128 bit shifts can be accomplished via 3 instructions for SHL and
1003  // SRL, but not for SRA because of the instructions available:
1004  // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1005  // doing
1009 
1011  }
1012  else {
1016 
1018 
1019  // VSX v2i64 only supports non-arithmetic operations.
1022  }
1023 
1024  if (Subtarget.isISA3_1())
1026  else
1028 
1033 
1035 
1044 
1045  // Custom handling for partial vectors of integers converted to
1046  // floating point. We already have optimal handling for v2i32 through
1047  // the DAG combine, so those aren't necessary.
1064 
1071 
1072  if (Subtarget.hasDirectMove())
1075 
1076  // Handle constrained floating-point operations of vector.
1077  // The predictor is `hasVSX` because altivec instruction has
1078  // no exception but VSX vector instruction has.
1092 
1106 
1107  addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1108  }
1109 
1110  if (Subtarget.hasP8Altivec()) {
1111  addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1112  addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1113  }
1114 
1115  if (Subtarget.hasP9Vector()) {
1118 
1119  // 128 bit shifts can be accomplished via 3 instructions for SHL and
1120  // SRL, but not for SRA because of the instructions available:
1121  // VS{RL} and VS{RL}O.
1125 
1126  addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1132  // No extending loads to f128 on PPC.
1133  for (MVT FPT : MVT::fp_valuetypes())
1142 
1149 
1156  // No implementation for these ops for PowerPC.
1162 
1163  // Handle constrained floating-point operations of fp128
1184  } else if (Subtarget.hasAltivec() && EnableSoftFP128) {
1185  addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1186 
1187  for (MVT FPT : MVT::fp_valuetypes())
1189 
1192 
1195 
1196  // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1197  // fp_to_uint and int_to_fp.
1200 
1213 
1216 
1217  // Expand the fp_extend if the target type is fp128.
1220 
1221  // Expand the fp_round if the source type is fp128.
1222  for (MVT VT : {MVT::f32, MVT::f64}) {
1225  }
1226  }
1227 
1228  if (Subtarget.hasP9Altivec()) {
1231 
1239  }
1240  }
1241 
1242  if (Subtarget.pairedVectorMemops()) {
1243  addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1246  }
1247  if (Subtarget.hasMMA()) {
1248  addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1252  }
1253 
1254  if (Subtarget.has64BitSupport())
1256 
1257  if (Subtarget.isISA3_1())
1259 
1261 
1262  if (!isPPC64) {
1265  }
1266 
1268 
1269  if (Subtarget.hasAltivec()) {
1270  // Altivec instructions set fields to all zeros or all ones.
1272  }
1273 
1274  if (!isPPC64) {
1275  // These libcalls are not available in 32-bit.
1276  setLibcallName(RTLIB::SHL_I128, nullptr);
1277  setLibcallName(RTLIB::SRL_I128, nullptr);
1278  setLibcallName(RTLIB::SRA_I128, nullptr);
1279  }
1280 
1281  if (!isPPC64)
1283 
1284  setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1285 
1286  // We have target-specific dag combine patterns for the following nodes:
1295  if (Subtarget.hasFPCVT())
1300  if (Subtarget.useCRBits())
1306 
1310 
1313 
1314 
1315  if (Subtarget.useCRBits()) {
1319  }
1320 
1321  if (Subtarget.hasP9Altivec()) {
1324  }
1325 
1326  setLibcallName(RTLIB::LOG_F128, "logf128");
1327  setLibcallName(RTLIB::LOG2_F128, "log2f128");
1328  setLibcallName(RTLIB::LOG10_F128, "log10f128");
1329  setLibcallName(RTLIB::EXP_F128, "expf128");
1330  setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1331  setLibcallName(RTLIB::SIN_F128, "sinf128");
1332  setLibcallName(RTLIB::COS_F128, "cosf128");
1333  setLibcallName(RTLIB::POW_F128, "powf128");
1334  setLibcallName(RTLIB::FMIN_F128, "fminf128");
1335  setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1336  setLibcallName(RTLIB::REM_F128, "fmodf128");
1337  setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1338  setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1339  setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1340  setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1341  setLibcallName(RTLIB::ROUND_F128, "roundf128");
1342  setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1343  setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1344  setLibcallName(RTLIB::RINT_F128, "rintf128");
1345  setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1346  setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1347  setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1348  setLibcallName(RTLIB::FMA_F128, "fmaf128");
1349 
1350  // With 32 condition bits, we don't need to sink (and duplicate) compares
1351  // aggressively in CodeGenPrep.
1352  if (Subtarget.useCRBits()) {
1355  }
1356 
1358 
1359  switch (Subtarget.getCPUDirective()) {
1360  default: break;
1361  case PPC::DIR_970:
1362  case PPC::DIR_A2:
1363  case PPC::DIR_E500:
1364  case PPC::DIR_E500mc:
1365  case PPC::DIR_E5500:
1366  case PPC::DIR_PWR4:
1367  case PPC::DIR_PWR5:
1368  case PPC::DIR_PWR5X:
1369  case PPC::DIR_PWR6:
1370  case PPC::DIR_PWR6X:
1371  case PPC::DIR_PWR7:
1372  case PPC::DIR_PWR8:
1373  case PPC::DIR_PWR9:
1374  case PPC::DIR_PWR10:
1375  case PPC::DIR_PWR_FUTURE:
1378  break;
1379  }
1380 
1381  if (Subtarget.enableMachineScheduler())
1383  else
1385 
1387 
1388  // The Freescale cores do better with aggressive inlining of memcpy and
1389  // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1390  if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1391  Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1392  MaxStoresPerMemset = 32;
1394  MaxStoresPerMemcpy = 32;
1396  MaxStoresPerMemmove = 32;
1398  } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1399  // The A2 also benefits from (very) aggressive inlining of memcpy and
1400  // friends. The overhead of a the function call, even when warm, can be
1401  // over one hundred cycles.
1402  MaxStoresPerMemset = 128;
1403  MaxStoresPerMemcpy = 128;
1404  MaxStoresPerMemmove = 128;
1405  MaxLoadsPerMemcmp = 128;
1406  } else {
1407  MaxLoadsPerMemcmp = 8;
1409  }
1410 
1411  IsStrictFPEnabled = true;
1412 
1413  // Let the subtarget (CPU) decide if a predictable select is more expensive
1414  // than the corresponding branch. This information is used in CGP to decide
1415  // when to convert selects into branches.
1417 }
1418 
1419 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1420 /// the desired ByVal argument alignment.
1421 static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1422  if (MaxAlign == MaxMaxAlign)
1423  return;
1424  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1425  if (MaxMaxAlign >= 32 &&
1426  VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1427  MaxAlign = Align(32);
1428  else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1429  MaxAlign < 16)
1430  MaxAlign = Align(16);
1431  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1432  Align EltAlign;
1433  getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1434  if (EltAlign > MaxAlign)
1435  MaxAlign = EltAlign;
1436  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1437  for (auto *EltTy : STy->elements()) {
1438  Align EltAlign;
1439  getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1440  if (EltAlign > MaxAlign)
1441  MaxAlign = EltAlign;
1442  if (MaxAlign == MaxMaxAlign)
1443  break;
1444  }
1445  }
1446 }
1447 
1448 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1449 /// function arguments in the caller parameter area.
1451  const DataLayout &DL) const {
1452  // 16byte and wider vectors are passed on 16byte boundary.
1453  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1454  Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1455  if (Subtarget.hasAltivec())
1456  getMaxByValAlign(Ty, Alignment, Align(16));
1457  return Alignment.value();
1458 }
1459 
1461  return Subtarget.useSoftFloat();
1462 }
1463 
1465  return Subtarget.hasSPE();
1466 }
1467 
1469  return VT.isScalarInteger();
1470 }
1471 
1472 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1473  switch ((PPCISD::NodeType)Opcode) {
1474  case PPCISD::FIRST_NUMBER: break;
1475  case PPCISD::FSEL: return "PPCISD::FSEL";
1476  case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";
1477  case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";
1478  case PPCISD::FCFID: return "PPCISD::FCFID";
1479  case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1480  case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1481  case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1482  case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1483  case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1484  case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1485  case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1487  return "PPCISD::FP_TO_UINT_IN_VSR,";
1489  return "PPCISD::FP_TO_SINT_IN_VSR";
1490  case PPCISD::FRE: return "PPCISD::FRE";
1491  case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1492  case PPCISD::FTSQRT:
1493  return "PPCISD::FTSQRT";
1494  case PPCISD::FSQRT:
1495  return "PPCISD::FSQRT";
1496  case PPCISD::STFIWX: return "PPCISD::STFIWX";
1497  case PPCISD::VPERM: return "PPCISD::VPERM";
1498  case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1500  return "PPCISD::XXSPLTI_SP_TO_DP";
1501  case PPCISD::XXSPLTI32DX:
1502  return "PPCISD::XXSPLTI32DX";
1503  case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1504  case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1505  case PPCISD::VECSHL: return "PPCISD::VECSHL";
1506  case PPCISD::CMPB: return "PPCISD::CMPB";
1507  case PPCISD::Hi: return "PPCISD::Hi";
1508  case PPCISD::Lo: return "PPCISD::Lo";
1509  case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1510  case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1511  case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1512  case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1513  case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1514  case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1515  case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1516  case PPCISD::SRL: return "PPCISD::SRL";
1517  case PPCISD::SRA: return "PPCISD::SRA";
1518  case PPCISD::SHL: return "PPCISD::SHL";
1519  case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1520  case PPCISD::CALL: return "PPCISD::CALL";
1521  case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1522  case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1523  case PPCISD::MTCTR: return "PPCISD::MTCTR";
1524  case PPCISD::BCTRL: return "PPCISD::BCTRL";
1525  case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1526  case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1527  case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1528  case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1529  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1530  case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1531  case PPCISD::MFVSR: return "PPCISD::MFVSR";
1532  case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1533  case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1534  case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1535  case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1537  return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1539  return "PPCISD::ANDI_rec_1_EQ_BIT";
1541  return "PPCISD::ANDI_rec_1_GT_BIT";
1542  case PPCISD::VCMP: return "PPCISD::VCMP";
1543  case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1544  case PPCISD::LBRX: return "PPCISD::LBRX";
1545  case PPCISD::STBRX: return "PPCISD::STBRX";
1546  case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1547  case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1548  case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1549  case PPCISD::STXSIX: return "PPCISD::STXSIX";
1550  case PPCISD::VEXTS: return "PPCISD::VEXTS";
1551  case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1552  case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1553  case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1554  case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1556  return "PPCISD::ST_VSR_SCAL_INT";
1557  case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1558  case PPCISD::BDNZ: return "PPCISD::BDNZ";
1559  case PPCISD::BDZ: return "PPCISD::BDZ";
1560  case PPCISD::MFFS: return "PPCISD::MFFS";
1561  case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1562  case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1563  case PPCISD::CR6SET: return "PPCISD::CR6SET";
1564  case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1565  case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1566  case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1567  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1568  case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1569  case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1570  case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1571  case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1572  case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1573  case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1574  case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1575  case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1576  case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1577  case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1578  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1579  case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1580  case PPCISD::PADDI_DTPREL:
1581  return "PPCISD::PADDI_DTPREL";
1582  case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1583  case PPCISD::SC: return "PPCISD::SC";
1584  case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1585  case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1586  case PPCISD::RFEBB: return "PPCISD::RFEBB";
1587  case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1588  case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1589  case PPCISD::VABSD: return "PPCISD::VABSD";
1590  case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1591  case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1592  case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1593  case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1594  case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1595  case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1596  case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1598  return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1600  return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1601  case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1602  case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1603  case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1604  case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1605  case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1606  case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1608  return "PPCISD::STRICT_FADDRTZ";
1609  case PPCISD::STRICT_FCTIDZ:
1610  return "PPCISD::STRICT_FCTIDZ";
1611  case PPCISD::STRICT_FCTIWZ:
1612  return "PPCISD::STRICT_FCTIWZ";
1614  return "PPCISD::STRICT_FCTIDUZ";
1616  return "PPCISD::STRICT_FCTIWUZ";
1617  case PPCISD::STRICT_FCFID:
1618  return "PPCISD::STRICT_FCFID";
1619  case PPCISD::STRICT_FCFIDU:
1620  return "PPCISD::STRICT_FCFIDU";
1621  case PPCISD::STRICT_FCFIDS:
1622  return "PPCISD::STRICT_FCFIDS";
1624  return "PPCISD::STRICT_FCFIDUS";
1625  case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1626  }
1627  return nullptr;
1628 }
1629 
1631  EVT VT) const {
1632  if (!VT.isVector())
1633  return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1634 
1636 }
1637 
1639  assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1640  return true;
1641 }
1642 
1643 //===----------------------------------------------------------------------===//
1644 // Node matching predicates, for use by the tblgen matching code.
1645 //===----------------------------------------------------------------------===//
1646 
1647 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1649  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1650  return CFP->getValueAPF().isZero();
1651  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1652  // Maybe this has already been legalized into the constant pool?
1653  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1654  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1655  return CFP->getValueAPF().isZero();
1656  }
1657  return false;
1658 }
1659 
1660 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1661 /// true if Op is undef or if it matches the specified value.
1662 static bool isConstantOrUndef(int Op, int Val) {
1663  return Op < 0 || Op == Val;
1664 }
1665 
1666 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1667 /// VPKUHUM instruction.
1668 /// The ShuffleKind distinguishes between big-endian operations with
1669 /// two different inputs (0), either-endian operations with two identical
1670 /// inputs (1), and little-endian operations with two different inputs (2).
1671 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1673  SelectionDAG &DAG) {
1674  bool IsLE = DAG.getDataLayout().isLittleEndian();
1675  if (ShuffleKind == 0) {
1676  if (IsLE)
1677  return false;
1678  for (unsigned i = 0; i != 16; ++i)
1679  if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1680  return false;
1681  } else if (ShuffleKind == 2) {
1682  if (!IsLE)
1683  return false;
1684  for (unsigned i = 0; i != 16; ++i)
1685  if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1686  return false;
1687  } else if (ShuffleKind == 1) {
1688  unsigned j = IsLE ? 0 : 1;
1689  for (unsigned i = 0; i != 8; ++i)
1690  if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1691  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1692  return false;
1693  }
1694  return true;
1695 }
1696 
1697 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1698 /// VPKUWUM instruction.
1699 /// The ShuffleKind distinguishes between big-endian operations with
1700 /// two different inputs (0), either-endian operations with two identical
1701 /// inputs (1), and little-endian operations with two different inputs (2).
1702 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1704  SelectionDAG &DAG) {
1705  bool IsLE = DAG.getDataLayout().isLittleEndian();
1706  if (ShuffleKind == 0) {
1707  if (IsLE)
1708  return false;
1709  for (unsigned i = 0; i != 16; i += 2)
1710  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1711  !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1712  return false;
1713  } else if (ShuffleKind == 2) {
1714  if (!IsLE)
1715  return false;
1716  for (unsigned i = 0; i != 16; i += 2)
1717  if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1718  !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1719  return false;
1720  } else if (ShuffleKind == 1) {
1721  unsigned j = IsLE ? 0 : 2;
1722  for (unsigned i = 0; i != 8; i += 2)
1723  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1724  !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1725  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1726  !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1727  return false;
1728  }
1729  return true;
1730 }
1731 
1732 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1733 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1734 /// current subtarget.
1735 ///
1736 /// The ShuffleKind distinguishes between big-endian operations with
1737 /// two different inputs (0), either-endian operations with two identical
1738 /// inputs (1), and little-endian operations with two different inputs (2).
1739 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1741  SelectionDAG &DAG) {
1742  const PPCSubtarget& Subtarget =
1743  static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1744  if (!Subtarget.hasP8Vector())
1745  return false;
1746 
1747  bool IsLE = DAG.getDataLayout().isLittleEndian();
1748  if (ShuffleKind == 0) {
1749  if (IsLE)
1750  return false;
1751  for (unsigned i = 0; i != 16; i += 4)
1752  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1753  !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1754  !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1755  !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1756  return false;
1757  } else if (ShuffleKind == 2) {
1758  if (!IsLE)
1759  return false;
1760  for (unsigned i = 0; i != 16; i += 4)
1761  if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1762  !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1763  !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1764  !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1765  return false;
1766  } else if (ShuffleKind == 1) {
1767  unsigned j = IsLE ? 0 : 4;
1768  for (unsigned i = 0; i != 8; i += 4)
1769  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1770  !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1771  !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1772  !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1773  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1774  !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1775  !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1776  !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1777  return false;
1778  }
1779  return true;
1780 }
1781 
1782 /// isVMerge - Common function, used to match vmrg* shuffles.
1783 ///
1784 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1785  unsigned LHSStart, unsigned RHSStart) {
1786  if (N->getValueType(0) != MVT::v16i8)
1787  return false;
1788  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1789  "Unsupported merge size!");
1790 
1791  for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1792  for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1793  if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1794  LHSStart+j+i*UnitSize) ||
1795  !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1796  RHSStart+j+i*UnitSize))
1797  return false;
1798  }
1799  return true;
1800 }
1801 
1802 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1803 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1804 /// The ShuffleKind distinguishes between big-endian merges with two
1805 /// different inputs (0), either-endian merges with two identical inputs (1),
1806 /// and little-endian merges with two different inputs (2). For the latter,
1807 /// the input operands are swapped (see PPCInstrAltivec.td).
1809  unsigned ShuffleKind, SelectionDAG &DAG) {
1810  if (DAG.getDataLayout().isLittleEndian()) {
1811  if (ShuffleKind == 1) // unary
1812  return isVMerge(N, UnitSize, 0, 0);
1813  else if (ShuffleKind == 2) // swapped
1814  return isVMerge(N, UnitSize, 0, 16);
1815  else
1816  return false;
1817  } else {
1818  if (ShuffleKind == 1) // unary
1819  return isVMerge(N, UnitSize, 8, 8);
1820  else if (ShuffleKind == 0) // normal
1821  return isVMerge(N, UnitSize, 8, 24);
1822  else
1823  return false;
1824  }
1825 }
1826 
1827 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1828 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1829 /// The ShuffleKind distinguishes between big-endian merges with two
1830 /// different inputs (0), either-endian merges with two identical inputs (1),
1831 /// and little-endian merges with two different inputs (2). For the latter,
1832 /// the input operands are swapped (see PPCInstrAltivec.td).
1834  unsigned ShuffleKind, SelectionDAG &DAG) {
1835  if (DAG.getDataLayout().isLittleEndian()) {
1836  if (ShuffleKind == 1) // unary
1837  return isVMerge(N, UnitSize, 8, 8);
1838  else if (ShuffleKind == 2) // swapped
1839  return isVMerge(N, UnitSize, 8, 24);
1840  else
1841  return false;
1842  } else {
1843  if (ShuffleKind == 1) // unary
1844  return isVMerge(N, UnitSize, 0, 0);
1845  else if (ShuffleKind == 0) // normal
1846  return isVMerge(N, UnitSize, 0, 16);
1847  else
1848  return false;
1849  }
1850 }
1851 
1852 /**
1853  * Common function used to match vmrgew and vmrgow shuffles
1854  *
1855  * The indexOffset determines whether to look for even or odd words in
1856  * the shuffle mask. This is based on the of the endianness of the target
1857  * machine.
1858  * - Little Endian:
1859  * - Use offset of 0 to check for odd elements
1860  * - Use offset of 4 to check for even elements
1861  * - Big Endian:
1862  * - Use offset of 0 to check for even elements
1863  * - Use offset of 4 to check for odd elements
1864  * A detailed description of the vector element ordering for little endian and
1865  * big endian can be found at
1866  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1867  * Targeting your applications - what little endian and big endian IBM XL C/C++
1868  * compiler differences mean to you
1869  *
1870  * The mask to the shuffle vector instruction specifies the indices of the
1871  * elements from the two input vectors to place in the result. The elements are
1872  * numbered in array-access order, starting with the first vector. These vectors
1873  * are always of type v16i8, thus each vector will contain 16 elements of size
1874  * 8. More info on the shuffle vector can be found in the
1875  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1876  * Language Reference.
1877  *
1878  * The RHSStartValue indicates whether the same input vectors are used (unary)
1879  * or two different input vectors are used, based on the following:
1880  * - If the instruction uses the same vector for both inputs, the range of the
1881  * indices will be 0 to 15. In this case, the RHSStart value passed should
1882  * be 0.
1883  * - If the instruction has two different vectors then the range of the
1884  * indices will be 0 to 31. In this case, the RHSStart value passed should
1885  * be 16 (indices 0-15 specify elements in the first vector while indices 16
1886  * to 31 specify elements in the second vector).
1887  *
1888  * \param[in] N The shuffle vector SD Node to analyze
1889  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1890  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1891  * vector to the shuffle_vector instruction
1892  * \return true iff this shuffle vector represents an even or odd word merge
1893  */
1894 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1895  unsigned RHSStartValue) {
1896  if (N->getValueType(0) != MVT::v16i8)
1897  return false;
1898 
1899  for (unsigned i = 0; i < 2; ++i)
1900  for (unsigned j = 0; j < 4; ++j)
1901  if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1902  i*RHSStartValue+j+IndexOffset) ||
1903  !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1904  i*RHSStartValue+j+IndexOffset+8))
1905  return false;
1906  return true;
1907 }
1908 
1909 /**
1910  * Determine if the specified shuffle mask is suitable for the vmrgew or
1911  * vmrgow instructions.
1912  *
1913  * \param[in] N The shuffle vector SD Node to analyze
1914  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1915  * \param[in] ShuffleKind Identify the type of merge:
1916  * - 0 = big-endian merge with two different inputs;
1917  * - 1 = either-endian merge with two identical inputs;
1918  * - 2 = little-endian merge with two different inputs (inputs are swapped for
1919  * little-endian merges).
1920  * \param[in] DAG The current SelectionDAG
1921  * \return true iff this shuffle mask
1922  */
1924  unsigned ShuffleKind, SelectionDAG &DAG) {
1925  if (DAG.getDataLayout().isLittleEndian()) {
1926  unsigned indexOffset = CheckEven ? 4 : 0;
1927  if (ShuffleKind == 1) // Unary
1928  return isVMerge(N, indexOffset, 0);
1929  else if (ShuffleKind == 2) // swapped
1930  return isVMerge(N, indexOffset, 16);
1931  else
1932  return false;
1933  }
1934  else {
1935  unsigned indexOffset = CheckEven ? 0 : 4;
1936  if (ShuffleKind == 1) // Unary
1937  return isVMerge(N, indexOffset, 0);
1938  else if (ShuffleKind == 0) // Normal
1939  return isVMerge(N, indexOffset, 16);
1940  else
1941  return false;
1942  }
1943  return false;
1944 }
1945 
1946 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1947 /// amount, otherwise return -1.
1948 /// The ShuffleKind distinguishes between big-endian operations with two
1949 /// different inputs (0), either-endian operations with two identical inputs
1950 /// (1), and little-endian operations with two different inputs (2). For the
1951 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1952 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1953  SelectionDAG &DAG) {
1954  if (N->getValueType(0) != MVT::v16i8)
1955  return -1;
1956 
1957  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1958 
1959  // Find the first non-undef value in the shuffle mask.
1960  unsigned i;
1961  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1962  /*search*/;
1963 
1964  if (i == 16) return -1; // all undef.
1965 
1966  // Otherwise, check to see if the rest of the elements are consecutively
1967  // numbered from this value.
1968  unsigned ShiftAmt = SVOp->getMaskElt(i);
1969  if (ShiftAmt < i) return -1;
1970 
1971  ShiftAmt -= i;
1972  bool isLE = DAG.getDataLayout().isLittleEndian();
1973 
1974  if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1975  // Check the rest of the elements to see if they are consecutive.
1976  for (++i; i != 16; ++i)
1977  if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1978  return -1;
1979  } else if (ShuffleKind == 1) {
1980  // Check the rest of the elements to see if they are consecutive.
1981  for (++i; i != 16; ++i)
1982  if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1983  return -1;
1984  } else
1985  return -1;
1986 
1987  if (isLE)
1988  ShiftAmt = 16 - ShiftAmt;
1989 
1990  return ShiftAmt;
1991 }
1992 
1993 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1994 /// specifies a splat of a single element that is suitable for input to
1995 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1997  assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
1998  EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
1999 
2000  // The consecutive indices need to specify an element, not part of two
2001  // different elements. So abandon ship early if this isn't the case.
2002  if (N->getMaskElt(0) % EltSize != 0)
2003  return false;
2004 
2005  // This is a splat operation if each element of the permute is the same, and
2006  // if the value doesn't reference the second vector.
2007  unsigned ElementBase = N->getMaskElt(0);
2008 
2009  // FIXME: Handle UNDEF elements too!
2010  if (ElementBase >= 16)
2011  return false;
2012 
2013  // Check that the indices are consecutive, in the case of a multi-byte element
2014  // splatted with a v16i8 mask.
2015  for (unsigned i = 1; i != EltSize; ++i)
2016  if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2017  return false;
2018 
2019  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2020  if (N->getMaskElt(i) < 0) continue;
2021  for (unsigned j = 0; j != EltSize; ++j)
2022  if (N->getMaskElt(i+j) != N->getMaskElt(j))
2023  return false;
2024  }
2025  return true;
2026 }
2027 
2028 /// Check that the mask is shuffling N byte elements. Within each N byte
2029 /// element of the mask, the indices could be either in increasing or
2030 /// decreasing order as long as they are consecutive.
2031 /// \param[in] N the shuffle vector SD Node to analyze
2032 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2033 /// Word/DoubleWord/QuadWord).
2034 /// \param[in] StepLen the delta indices number among the N byte element, if
2035 /// the mask is in increasing/decreasing order then it is 1/-1.
2036 /// \return true iff the mask is shuffling N byte elements.
2038  int StepLen) {
2039  assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2040  "Unexpected element width.");
2041  assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2042 
2043  unsigned NumOfElem = 16 / Width;
2044  unsigned MaskVal[16]; // Width is never greater than 16
2045  for (unsigned i = 0; i < NumOfElem; ++i) {
2046  MaskVal[0] = N->getMaskElt(i * Width);
2047  if ((StepLen == 1) && (MaskVal[0] % Width)) {
2048  return false;
2049  } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2050  return false;
2051  }
2052 
2053  for (unsigned int j = 1; j < Width; ++j) {
2054  MaskVal[j] = N->getMaskElt(i * Width + j);
2055  if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2056  return false;
2057  }
2058  }
2059  }
2060 
2061  return true;
2062 }
2063 
2064 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2065  unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2066  if (!isNByteElemShuffleMask(N, 4, 1))
2067  return false;
2068 
2069  // Now we look at mask elements 0,4,8,12
2070  unsigned M0 = N->getMaskElt(0) / 4;
2071  unsigned M1 = N->getMaskElt(4) / 4;
2072  unsigned M2 = N->getMaskElt(8) / 4;
2073  unsigned M3 = N->getMaskElt(12) / 4;
2074  unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2075  unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2076 
2077  // Below, let H and L be arbitrary elements of the shuffle mask
2078  // where H is in the range [4,7] and L is in the range [0,3].
2079  // H, 1, 2, 3 or L, 5, 6, 7
2080  if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2081  (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2082  ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2083  InsertAtByte = IsLE ? 12 : 0;
2084  Swap = M0 < 4;
2085  return true;
2086  }
2087  // 0, H, 2, 3 or 4, L, 6, 7
2088  if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2089  (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2090  ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2091  InsertAtByte = IsLE ? 8 : 4;
2092  Swap = M1 < 4;
2093  return true;
2094  }
2095  // 0, 1, H, 3 or 4, 5, L, 7
2096  if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2097  (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2098  ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2099  InsertAtByte = IsLE ? 4 : 8;
2100  Swap = M2 < 4;
2101  return true;
2102  }
2103  // 0, 1, 2, H or 4, 5, 6, L
2104  if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2105  (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2106  ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2107  InsertAtByte = IsLE ? 0 : 12;
2108  Swap = M3 < 4;
2109  return true;
2110  }
2111 
2112  // If both vector operands for the shuffle are the same vector, the mask will
2113  // contain only elements from the first one and the second one will be undef.
2114  if (N->getOperand(1).isUndef()) {
2115  ShiftElts = 0;
2116  Swap = true;
2117  unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2118  if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2119  InsertAtByte = IsLE ? 12 : 0;
2120  return true;
2121  }
2122  if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2123  InsertAtByte = IsLE ? 8 : 4;
2124  return true;
2125  }
2126  if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2127  InsertAtByte = IsLE ? 4 : 8;
2128  return true;
2129  }
2130  if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2131  InsertAtByte = IsLE ? 0 : 12;
2132  return true;
2133  }
2134  }
2135 
2136  return false;
2137 }
2138 
2140  bool &Swap, bool IsLE) {
2141  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2142  // Ensure each byte index of the word is consecutive.
2143  if (!isNByteElemShuffleMask(N, 4, 1))
2144  return false;
2145 
2146  // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2147  unsigned M0 = N->getMaskElt(0) / 4;
2148  unsigned M1 = N->getMaskElt(4) / 4;
2149  unsigned M2 = N->getMaskElt(8) / 4;
2150  unsigned M3 = N->getMaskElt(12) / 4;
2151 
2152  // If both vector operands for the shuffle are the same vector, the mask will
2153  // contain only elements from the first one and the second one will be undef.
2154  if (N->getOperand(1).isUndef()) {
2155  assert(M0 < 4 && "Indexing into an undef vector?");
2156  if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2157  return false;
2158 
2159  ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2160  Swap = false;
2161  return true;
2162  }
2163 
2164  // Ensure each word index of the ShuffleVector Mask is consecutive.
2165  if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2166  return false;
2167 
2168  if (IsLE) {
2169  if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2170  // Input vectors don't need to be swapped if the leading element
2171  // of the result is one of the 3 left elements of the second vector
2172  // (or if there is no shift to be done at all).
2173  Swap = false;
2174  ShiftElts = (8 - M0) % 8;
2175  } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2176  // Input vectors need to be swapped if the leading element
2177  // of the result is one of the 3 left elements of the first vector
2178  // (or if we're shifting by 4 - thereby simply swapping the vectors).
2179  Swap = true;
2180  ShiftElts = (4 - M0) % 4;
2181  }
2182 
2183  return true;
2184  } else { // BE
2185  if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2186  // Input vectors don't need to be swapped if the leading element
2187  // of the result is one of the 4 elements of the first vector.
2188  Swap = false;
2189  ShiftElts = M0;
2190  } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2191  // Input vectors need to be swapped if the leading element
2192  // of the result is one of the 4 elements of the right vector.
2193  Swap = true;
2194  ShiftElts = M0 - 4;
2195  }
2196 
2197  return true;
2198  }
2199 }
2200 
2202  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2203 
2204  if (!isNByteElemShuffleMask(N, Width, -1))
2205  return false;
2206 
2207  for (int i = 0; i < 16; i += Width)
2208  if (N->getMaskElt(i) != i + Width - 1)
2209  return false;
2210 
2211  return true;
2212 }
2213 
2215  return isXXBRShuffleMaskHelper(N, 2);
2216 }
2217 
2219  return isXXBRShuffleMaskHelper(N, 4);
2220 }
2221 
2223  return isXXBRShuffleMaskHelper(N, 8);
2224 }
2225 
2227  return isXXBRShuffleMaskHelper(N, 16);
2228 }
2229 
2230 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2231 /// if the inputs to the instruction should be swapped and set \p DM to the
2232 /// value for the immediate.
2233 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2234 /// AND element 0 of the result comes from the first input (LE) or second input
2235 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2236 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2237 /// mask.
2239  bool &Swap, bool IsLE) {
2240  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2241 
2242  // Ensure each byte index of the double word is consecutive.
2243  if (!isNByteElemShuffleMask(N, 8, 1))
2244  return false;
2245 
2246  unsigned M0 = N->getMaskElt(0) / 8;
2247  unsigned M1 = N->getMaskElt(8) / 8;
2248  assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2249 
2250  // If both vector operands for the shuffle are the same vector, the mask will
2251  // contain only elements from the first one and the second one will be undef.
2252  if (N->getOperand(1).isUndef()) {
2253  if ((M0 | M1) < 2) {
2254  DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2255  Swap = false;
2256  return true;
2257  } else
2258  return false;
2259  }
2260 
2261  if (IsLE) {
2262  if (M0 > 1 && M1 < 2) {
2263  Swap = false;
2264  } else if (M0 < 2 && M1 > 1) {
2265  M0 = (M0 + 2) % 4;
2266  M1 = (M1 + 2) % 4;
2267  Swap = true;
2268  } else
2269  return false;
2270 
2271  // Note: if control flow comes here that means Swap is already set above
2272  DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2273  return true;
2274  } else { // BE
2275  if (M0 < 2 && M1 > 1) {
2276  Swap = false;
2277  } else if (M0 > 1 && M1 < 2) {
2278  M0 = (M0 + 2) % 4;
2279  M1 = (M1 + 2) % 4;
2280  Swap = true;
2281  } else
2282  return false;
2283 
2284  // Note: if control flow comes here that means Swap is already set above
2285  DM = (M0 << 1) + (M1 & 1);
2286  return true;
2287  }
2288 }
2289 
2290 
2291 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2292 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2293 /// elements are counted from the left of the vector register).
2294 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2295  SelectionDAG &DAG) {
2296  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2297  assert(isSplatShuffleMask(SVOp, EltSize));
2298  if (DAG.getDataLayout().isLittleEndian())
2299  return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2300  else
2301  return SVOp->getMaskElt(0) / EltSize;
2302 }
2303 
2304 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2305 /// by using a vspltis[bhw] instruction of the specified element size, return
2306 /// the constant being splatted. The ByteSize field indicates the number of
2307 /// bytes of each element [124] -> [bhw].
2308 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2309  SDValue OpVal(nullptr, 0);
2310 
2311  // If ByteSize of the splat is bigger than the element size of the
2312  // build_vector, then we have a case where we are checking for a splat where
2313  // multiple elements of the buildvector are folded together into a single
2314  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2315  unsigned EltSize = 16/N->getNumOperands();
2316  if (EltSize < ByteSize) {
2317  unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2318  SDValue UniquedVals[4];
2319  assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2320 
2321  // See if all of the elements in the buildvector agree across.
2322  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2323  if (N->getOperand(i).isUndef()) continue;
2324  // If the element isn't a constant, bail fully out.
2325  if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2326 
2327  if (!UniquedVals[i&(Multiple-1)].getNode())
2328  UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2329  else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2330  return SDValue(); // no match.
2331  }
2332 
2333  // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2334  // either constant or undef values that are identical for each chunk. See
2335  // if these chunks can form into a larger vspltis*.
2336 
2337  // Check to see if all of the leading entries are either 0 or -1. If
2338  // neither, then this won't fit into the immediate field.
2339  bool LeadingZero = true;
2340  bool LeadingOnes = true;
2341  for (unsigned i = 0; i != Multiple-1; ++i) {
2342  if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2343 
2344  LeadingZero &= isNullConstant(UniquedVals[i]);
2345  LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2346  }
2347  // Finally, check the least significant entry.
2348  if (LeadingZero) {
2349  if (!UniquedVals[Multiple-1].getNode())
2350  return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2351  int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2352  if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2353  return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2354  }
2355  if (LeadingOnes) {
2356  if (!UniquedVals[Multiple-1].getNode())
2357  return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2358  int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2359  if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2360  return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2361  }
2362 
2363  return SDValue();
2364  }
2365 
2366  // Check to see if this buildvec has a single non-undef value in its elements.
2367  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2368  if (N->getOperand(i).isUndef()) continue;
2369  if (!OpVal.getNode())
2370  OpVal = N->getOperand(i);
2371  else if (OpVal != N->getOperand(i))
2372  return SDValue();
2373  }
2374 
2375  if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2376 
2377  unsigned ValSizeInBytes = EltSize;
2378  uint64_t Value = 0;
2379  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2380  Value = CN->getZExtValue();
2381  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2382  assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2383  Value = FloatToBits(CN->getValueAPF().convertToFloat());
2384  }
2385 
2386  // If the splat value is larger than the element value, then we can never do
2387  // this splat. The only case that we could fit the replicated bits into our
2388  // immediate field for would be zero, and we prefer to use vxor for it.
2389  if (ValSizeInBytes < ByteSize) return SDValue();
2390 
2391  // If the element value is larger than the splat value, check if it consists
2392  // of a repeated bit pattern of size ByteSize.
2393  if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2394  return SDValue();
2395 
2396  // Properly sign extend the value.
2397  int MaskVal = SignExtend32(Value, ByteSize * 8);
2398 
2399  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2400  if (MaskVal == 0) return SDValue();
2401 
2402  // Finally, if this value fits in a 5 bit sext field, return it
2403  if (SignExtend32<5>(MaskVal) == MaskVal)
2404  return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2405  return SDValue();
2406 }
2407 
2408 //===----------------------------------------------------------------------===//
2409 // Addressing Mode Selection
2410 //===----------------------------------------------------------------------===//
2411 
2412 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2413 /// or 64-bit immediate, and if the value can be accurately represented as a
2414 /// sign extension from a 16-bit value. If so, this returns true and the
2415 /// immediate.
2416 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2417  if (!isa<ConstantSDNode>(N))
2418  return false;
2419 
2420  Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2421  if (N->getValueType(0) == MVT::i32)
2422  return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2423  else
2424  return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2425 }
2426 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2427  return isIntS16Immediate(Op.getNode(), Imm);
2428 }
2429 
2430 
2431 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2432 /// be represented as an indexed [r+r] operation.
2434  SDValue &Index,
2435  SelectionDAG &DAG) const {
2436  for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2437  UI != E; ++UI) {
2438  if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2439  if (Memop->getMemoryVT() == MVT::f64) {
2440  Base = N.getOperand(0);
2441  Index = N.getOperand(1);
2442  return true;
2443  }
2444  }
2445  }
2446  return false;
2447 }
2448 
2449 /// isIntS34Immediate - This method tests if value of node given can be
2450 /// accurately represented as a sign extension from a 34-bit value. If so,
2451 /// this returns true and the immediate.
2452 bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2453  if (!isa<ConstantSDNode>(N))
2454  return false;
2455 
2456  Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2457  return isInt<34>(Imm);
2458 }
2459 bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2460  return isIntS34Immediate(Op.getNode(), Imm);
2461 }
2462 
2463 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2464 /// can be represented as an indexed [r+r] operation. Returns false if it
2465 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2466 /// non-zero and N can be represented by a base register plus a signed 16-bit
2467 /// displacement, make a more precise judgement by checking (displacement % \p
2468 /// EncodingAlignment).
2470  SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2471  MaybeAlign EncodingAlignment) const {
2472  // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2473  // a [pc+imm].
2474  if (SelectAddressPCRel(N, Base))
2475  return false;
2476 
2477  int16_t Imm = 0;
2478  if (N.getOpcode() == ISD::ADD) {
2479  // Is there any SPE load/store (f64), which can't handle 16bit offset?
2480  // SPE load/store can only handle 8-bit offsets.
2481  if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2482  return true;
2483  if (isIntS16Immediate(N.getOperand(1), Imm) &&
2484  (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2485  return false; // r+i
2486  if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2487  return false; // r+i
2488 
2489  Base = N.getOperand(0);
2490  Index = N.getOperand(1);
2491  return true;
2492  } else if (N.getOpcode() == ISD::OR) {
2493  if (isIntS16Immediate(N.getOperand(1), Imm) &&
2494  (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2495  return false; // r+i can fold it if we can.
2496 
2497  // If this is an or of disjoint bitfields, we can codegen this as an add
2498  // (for better address arithmetic) if the LHS and RHS of the OR are provably
2499  // disjoint.
2500  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2501 
2502  if (LHSKnown.Zero.getBoolValue()) {
2503  KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2504  // If all of the bits are known zero on the LHS or RHS, the add won't
2505  // carry.
2506  if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2507  Base = N.getOperand(0);
2508  Index = N.getOperand(1);
2509  return true;
2510  }
2511  }
2512  }
2513 
2514  return false;
2515 }
2516 
2517 // If we happen to be doing an i64 load or store into a stack slot that has
2518 // less than a 4-byte alignment, then the frame-index elimination may need to
2519 // use an indexed load or store instruction (because the offset may not be a
2520 // multiple of 4). The extra register needed to hold the offset comes from the
2521 // register scavenger, and it is possible that the scavenger will need to use
2522 // an emergency spill slot. As a result, we need to make sure that a spill slot
2523 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2524 // stack slot.
2525 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2526  // FIXME: This does not handle the LWA case.
2527  if (VT != MVT::i64)
2528  return;
2529 
2530  // NOTE: We'll exclude negative FIs here, which come from argument
2531  // lowering, because there are no known test cases triggering this problem
2532  // using packed structures (or similar). We can remove this exclusion if
2533  // we find such a test case. The reason why this is so test-case driven is
2534  // because this entire 'fixup' is only to prevent crashes (from the
2535  // register scavenger) on not-really-valid inputs. For example, if we have:
2536  // %a = alloca i1
2537  // %b = bitcast i1* %a to i64*
2538  // store i64* a, i64 b
2539  // then the store should really be marked as 'align 1', but is not. If it
2540  // were marked as 'align 1' then the indexed form would have been
2541  // instruction-selected initially, and the problem this 'fixup' is preventing
2542  // won't happen regardless.
2543  if (FrameIdx < 0)
2544  return;
2545 
2546  MachineFunction &MF = DAG.getMachineFunction();
2547  MachineFrameInfo &MFI = MF.getFrameInfo();
2548 
2549  if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2550  return;
2551 
2552  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2553  FuncInfo->setHasNonRISpills();
2554 }
2555 
2556 /// Returns true if the address N can be represented by a base register plus
2557 /// a signed 16-bit displacement [r+imm], and if it is not better
2558 /// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2559 /// displacements that are multiples of that value.
2561  SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2562  MaybeAlign EncodingAlignment) const {
2563  // FIXME dl should come from parent load or store, not from address
2564  SDLoc dl(N);
2565 
2566  // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2567  // a [pc+imm].
2568  if (SelectAddressPCRel(N, Base))
2569  return false;
2570 
2571  // If this can be more profitably realized as r+r, fail.
2572  if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2573  return false;
2574 
2575  if (N.getOpcode() == ISD::ADD) {
2576  int16_t imm = 0;
2577  if (isIntS16Immediate(N.getOperand(1), imm) &&
2578  (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2579  Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2580  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2581  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2582  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2583  } else {
2584  Base = N.getOperand(0);
2585  }
2586  return true; // [r+i]
2587  } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2588  // Match LOAD (ADD (X, Lo(G))).
2589  assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2590  && "Cannot handle constant offsets yet!");
2591  Disp = N.getOperand(1).getOperand(0); // The global address.
2594  Disp.getOpcode() == ISD::TargetConstantPool ||
2595  Disp.getOpcode() == ISD::TargetJumpTable);
2596  Base = N.getOperand(0);
2597  return true; // [&g+r]
2598  }
2599  } else if (N.getOpcode() == ISD::OR) {
2600  int16_t imm = 0;
2601  if (isIntS16Immediate(N.getOperand(1), imm) &&
2602  (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2603  // If this is an or of disjoint bitfields, we can codegen this as an add
2604  // (for better address arithmetic) if the LHS and RHS of the OR are
2605  // provably disjoint.
2606  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2607 
2608  if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2609  // If all of the bits are known zero on the LHS or RHS, the add won't
2610  // carry.
2611  if (FrameIndexSDNode *FI =
2612  dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2613  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2614  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2615  } else {
2616  Base = N.getOperand(0);
2617  }
2618  Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2619  return true;
2620  }
2621  }
2622  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2623  // Loading from a constant address.
2624 
2625  // If this address fits entirely in a 16-bit sext immediate field, codegen
2626  // this as "d, 0"
2627  int16_t Imm;
2628  if (isIntS16Immediate(CN, Imm) &&
2629  (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2630  Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2631  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2632  CN->getValueType(0));
2633  return true;
2634  }
2635 
2636  // Handle 32-bit sext immediates with LIS + addr mode.
2637  if ((CN->getValueType(0) == MVT::i32 ||
2638  (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2639  (!EncodingAlignment ||
2640  isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2641  int Addr = (int)CN->getZExtValue();
2642 
2643  // Otherwise, break this down into an LIS + disp.
2644  Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2645 
2646  Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2647  MVT::i32);
2648  unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2649  Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2650  return true;
2651  }
2652  }
2653 
2654  Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2655  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2656  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2657  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2658  } else
2659  Base = N;
2660  return true; // [r+0]
2661 }
2662 
2663 /// Similar to the 16-bit case but for instructions that take a 34-bit
2664 /// displacement field (prefixed loads/stores).
2666  SDValue &Base,
2667  SelectionDAG &DAG) const {
2668  // Only on 64-bit targets.
2669  if (N.getValueType() != MVT::i64)
2670  return false;
2671 
2672  SDLoc dl(N);
2673  int64_t Imm = 0;
2674 
2675  if (N.getOpcode() == ISD::ADD) {
2676  if (!isIntS34Immediate(N.getOperand(1), Imm))
2677  return false;
2678  Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2679  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2680  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2681  else
2682  Base = N.getOperand(0);
2683  return true;
2684  }
2685 
2686  if (N.getOpcode() == ISD::OR) {
2687  if (!isIntS34Immediate(N.getOperand(1), Imm))
2688  return false;
2689  // If this is an or of disjoint bitfields, we can codegen this as an add
2690  // (for better address arithmetic) if the LHS and RHS of the OR are
2691  // provably disjoint.
2692  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2693  if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2694  return false;
2695  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2696  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2697  else
2698  Base = N.getOperand(0);
2699  Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2700  return true;
2701  }
2702 
2703  if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2704  Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2705  Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2706  return true;
2707  }
2708 
2709  return false;
2710 }
2711 
2712 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2713 /// represented as an indexed [r+r] operation.
2715  SDValue &Index,
2716  SelectionDAG &DAG) const {
2717  // Check to see if we can easily represent this as an [r+r] address. This
2718  // will fail if it thinks that the address is more profitably represented as
2719  // reg+imm, e.g. where imm = 0.
2720  if (SelectAddressRegReg(N, Base, Index, DAG))
2721  return true;
2722 
2723  // If the address is the result of an add, we will utilize the fact that the
2724  // address calculation includes an implicit add. However, we can reduce
2725  // register pressure if we do not materialize a constant just for use as the
2726  // index register. We only get rid of the add if it is not an add of a
2727  // value and a 16-bit signed constant and both have a single use.
2728  int16_t imm = 0;
2729  if (N.getOpcode() == ISD::ADD &&
2730  (!isIntS16Immediate(N.getOperand(1), imm) ||
2731  !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2732  Base = N.getOperand(0);
2733  Index = N.getOperand(1);
2734  return true;
2735  }
2736 
2737  // Otherwise, do it the hard way, using R0 as the base register.
2738  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2739  N.getValueType());
2740  Index = N;
2741  return true;
2742 }
2743 
2744 template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2745  Ty *PCRelCand = dyn_cast<Ty>(N);
2746  return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2747 }
2748 
2749 /// Returns true if this address is a PC Relative address.
2750 /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2751 /// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2753  // This is a materialize PC Relative node. Always select this as PC Relative.
2754  Base = N;
2755  if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2756  return true;
2757  if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2758  isValidPCRelNode<GlobalAddressSDNode>(N) ||
2759  isValidPCRelNode<JumpTableSDNode>(N) ||
2760  isValidPCRelNode<BlockAddressSDNode>(N))
2761  return true;
2762  return false;
2763 }
2764 
2765 /// Returns true if we should use a direct load into vector instruction
2766 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2768 
2769  // If there are any other uses other than scalar to vector, then we should
2770  // keep it as a scalar load -> direct move pattern to prevent multiple
2771  // loads.
2772  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2773  if (!LD)
2774  return false;
2775 
2776  EVT MemVT = LD->getMemoryVT();
2777  if (!MemVT.isSimple())
2778  return false;
2779  switch(MemVT.getSimpleVT().SimpleTy) {
2780  case MVT::i64:
2781  break;
2782  case MVT::i32:
2783  if (!ST.hasP8Vector())
2784  return false;
2785  break;
2786  case MVT::i16:
2787  case MVT::i8:
2788  if (!ST.hasP9Vector())
2789  return false;
2790  break;
2791  default:
2792  return false;
2793  }
2794 
2795  SDValue LoadedVal(N, 0);
2796  if (!LoadedVal.hasOneUse())
2797  return false;
2798 
2799  for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2800  UI != UE; ++UI)
2801  if (UI.getUse().get().getResNo() == 0 &&
2802  UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2803  UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2804  return false;
2805 
2806  return true;
2807 }
2808 
2809 /// getPreIndexedAddressParts - returns true by value, base pointer and
2810 /// offset pointer and addressing mode by reference if the node's address
2811 /// can be legally represented as pre-indexed load / store address.
2813  SDValue &Offset,
2814  ISD::MemIndexedMode &AM,
2815  SelectionDAG &DAG) const {
2816  if (DisablePPCPreinc) return false;
2817 
2818  bool isLoad = true;
2819  SDValue Ptr;
2820  EVT VT;
2821  unsigned Alignment;
2822  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2823  Ptr = LD->getBasePtr();
2824  VT = LD->getMemoryVT();
2825  Alignment = LD->getAlignment();
2826  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2827  Ptr = ST->getBasePtr();
2828  VT = ST->getMemoryVT();
2829  Alignment = ST->getAlignment();
2830  isLoad = false;
2831  } else
2832  return false;
2833 
2834  // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2835  // instructions because we can fold these into a more efficient instruction
2836  // instead, (such as LXSD).
2837  if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2838  return false;
2839  }
2840 
2841  // PowerPC doesn't have preinc load/store instructions for vectors
2842  if (VT.isVector())
2843  return false;
2844 
2845  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2846  // Common code will reject creating a pre-inc form if the base pointer
2847  // is a frame index, or if N is a store and the base pointer is either
2848  // the same as or a predecessor of the value being stored. Check for
2849  // those situations here, and try with swapped Base/Offset instead.
2850  bool Swap = false;
2851 
2852  if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2853  Swap = true;
2854  else if (!isLoad) {
2855  SDValue Val = cast<StoreSDNode>(N)->getValue();
2856  if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2857  Swap = true;
2858  }
2859 
2860  if (Swap)
2861  std::swap(Base, Offset);
2862 
2863  AM = ISD::PRE_INC;
2864  return true;
2865  }
2866 
2867  // LDU/STU can only handle immediates that are a multiple of 4.
2868  if (VT != MVT::i64) {
2869  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
2870  return false;
2871  } else {
2872  // LDU/STU need an address with at least 4-byte alignment.
2873  if (Alignment < 4)
2874  return false;
2875 
2876  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2877  return false;
2878  }
2879 
2880  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2881  // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2882  // sext i32 to i64 when addr mode is r+i.
2883  if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2884  LD->getExtensionType() == ISD::SEXTLOAD &&
2885  isa<ConstantSDNode>(Offset))
2886  return false;
2887  }
2888 
2889  AM = ISD::PRE_INC;
2890  return true;
2891 }
2892 
2893 //===----------------------------------------------------------------------===//
2894 // LowerOperation implementation
2895 //===----------------------------------------------------------------------===//
2896 
2897 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2898 /// and LoOpFlags to the target MO flags.
2899 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2900  unsigned &HiOpFlags, unsigned &LoOpFlags,
2901  const GlobalValue *GV = nullptr) {
2902  HiOpFlags = PPCII::MO_HA;
2903  LoOpFlags = PPCII::MO_LO;
2904 
2905  // Don't use the pic base if not in PIC relocation model.
2906  if (IsPIC) {
2907  HiOpFlags |= PPCII::MO_PIC_FLAG;
2908  LoOpFlags |= PPCII::MO_PIC_FLAG;
2909  }
2910 }
2911 
2912 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2913  SelectionDAG &DAG) {
2914  SDLoc DL(HiPart);
2915  EVT PtrVT = HiPart.getValueType();
2916  SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2917 
2918  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2919  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2920 
2921  // With PIC, the first instruction is actually "GR+hi(&G)".
2922  if (isPIC)
2923  Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2924  DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2925 
2926  // Generate non-pic code that has direct accesses to the constant pool.
2927  // The address of the global is just (hi(&g)+lo(&g)).
2928  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2929 }
2930 
2932  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2933  FuncInfo->setUsesTOCBasePtr();
2934 }
2935 
2936 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2938 }
2939 
2940 SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2941  SDValue GA) const {
2942  const bool Is64Bit = Subtarget.isPPC64();
2943  EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2944  SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2945  : Subtarget.isAIXABI()
2946  ? DAG.getRegister(PPC::R2, VT)
2947  : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2948  SDValue Ops[] = { GA, Reg };
2949  return DAG.getMemIntrinsicNode(
2950  PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2953 }
2954 
2955 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2956  SelectionDAG &DAG) const {
2957  EVT PtrVT = Op.getValueType();
2958  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2959  const Constant *C = CP->getConstVal();
2960 
2961  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2962  // The actual address of the GlobalValue is stored in the TOC.
2963  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2964  if (Subtarget.isUsingPCRelativeCalls()) {
2965  SDLoc DL(CP);
2966  EVT Ty = getPointerTy(DAG.getDataLayout());
2967  SDValue ConstPool = DAG.getTargetConstantPool(
2968  C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
2969  return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
2970  }
2971  setUsesTOCBasePtr(DAG);
2972  SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
2973  return getTOCEntry(DAG, SDLoc(CP), GA);
2974  }
2975 
2976  unsigned MOHiFlag, MOLoFlag;
2977  bool IsPIC = isPositionIndependent();
2978  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2979 
2980  if (IsPIC && Subtarget.isSVR4ABI()) {
2981  SDValue GA =
2982  DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
2983  return getTOCEntry(DAG, SDLoc(CP), GA);
2984  }
2985 
2986  SDValue CPIHi =
2987  DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
2988  SDValue CPILo =
2989  DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
2990  return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2991 }
2992 
2993 // For 64-bit PowerPC, prefer the more compact relative encodings.
2994 // This trades 32 bits per jump table entry for one or two instructions
2995 // on the jump site.
2997  if (isJumpTableRelative())
2999 
3001 }
3002 
3005  return false;
3006  if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3007  return true;
3009 }
3010 
3012  SelectionDAG &DAG) const {
3013  if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3014  return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3015 
3016  switch (getTargetMachine().getCodeModel()) {
3017  case CodeModel::Small:
3018  case CodeModel::Medium:
3019  return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3020  default:
3021  return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3022  getPointerTy(DAG.getDataLayout()));
3023  }
3024 }
3025 
3026 const MCExpr *
3028  unsigned JTI,
3029  MCContext &Ctx) const {
3030  if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3031  return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3032 
3033  switch (getTargetMachine().getCodeModel()) {
3034  case CodeModel::Small:
3035  case CodeModel::Medium:
3036  return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3037  default:
3038  return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3039  }
3040 }
3041 
3042 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3043  EVT PtrVT = Op.getValueType();
3044  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3045 
3046  // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3047  if (Subtarget.isUsingPCRelativeCalls()) {
3048  SDLoc DL(JT);
3049  EVT Ty = getPointerTy(DAG.getDataLayout());
3050  SDValue GA =
3051  DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3052  SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3053  return MatAddr;
3054  }
3055 
3056  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3057  // The actual address of the GlobalValue is stored in the TOC.
3058  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3059  setUsesTOCBasePtr(DAG);
3060  SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3061  return getTOCEntry(DAG, SDLoc(JT), GA);
3062  }
3063 
3064  unsigned MOHiFlag, MOLoFlag;
3065  bool IsPIC = isPositionIndependent();
3066  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3067 
3068  if (IsPIC && Subtarget.isSVR4ABI()) {
3069  SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3071  return getTOCEntry(DAG, SDLoc(GA), GA);
3072  }
3073 
3074  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3075  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3076  return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3077 }
3078 
3079 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3080  SelectionDAG &DAG) const {
3081  EVT PtrVT = Op.getValueType();
3082  BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3083  const BlockAddress *BA = BASDN->getBlockAddress();
3084 
3085  // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3086  if (Subtarget.isUsingPCRelativeCalls()) {
3087  SDLoc DL(BASDN);
3088  EVT Ty = getPointerTy(DAG.getDataLayout());
3089  SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3091  SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3092  return MatAddr;
3093  }
3094 
3095  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3096  // The actual BlockAddress is stored in the TOC.
3097  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3098  setUsesTOCBasePtr(DAG);
3099  SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3100  return getTOCEntry(DAG, SDLoc(BASDN), GA);
3101  }
3102 
3103  // 32-bit position-independent ELF stores the BlockAddress in the .got.
3104  if (Subtarget.is32BitELFABI() && isPositionIndependent())
3105  return getTOCEntry(
3106  DAG, SDLoc(BASDN),
3107  DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3108 
3109  unsigned MOHiFlag, MOLoFlag;
3110  bool IsPIC = isPositionIndependent();
3111  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3112  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3113  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3114  return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3115 }
3116 
3117 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3118  SelectionDAG &DAG) const {
3119  // FIXME: TLS addresses currently use medium model code sequences,
3120  // which is the most useful form. Eventually support for small and
3121  // large models could be added if users need it, at the cost of
3122  // additional complexity.
3123  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3124  if (DAG.getTarget().useEmulatedTLS())
3125  return LowerToTLSEmulatedModel(GA, DAG);
3126 
3127  SDLoc dl(GA);
3128  const GlobalValue *GV = GA->getGlobal();
3129  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3130  bool is64bit = Subtarget.isPPC64();
3131  const Module *M = DAG.getMachineFunction().getFunction().getParent();
3132  PICLevel::Level picLevel = M->getPICLevel();
3133 
3134  const TargetMachine &TM = getTargetMachine();
3135  TLSModel::Model Model = TM.getTLSModel(GV);
3136 
3137  if (Model == TLSModel::LocalExec) {
3138  if (Subtarget.isUsingPCRelativeCalls()) {
3139  SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3140  SDValue TGA = DAG.getTargetGlobalAddress(
3141  GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3142  SDValue MatAddr =
3143  DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3144  return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3145  }
3146 
3147  SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3149  SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3151  SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3152  : DAG.getRegister(PPC::R2, MVT::i32);
3153 
3154  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3155  return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3156  }
3157 
3158  if (Model == TLSModel::InitialExec) {
3159  bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3160  SDValue TGA = DAG.getTargetGlobalAddress(
3161  GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3162  SDValue TGATLS = DAG.getTargetGlobalAddress(
3163  GV, dl, PtrVT, 0,
3165  SDValue TPOffset;
3166  if (IsPCRel) {
3167  SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3168  TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3169  MachinePointerInfo());
3170  } else {
3171  SDValue GOTPtr;
3172  if (is64bit) {
3173  setUsesTOCBasePtr(DAG);
3174  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3175  GOTPtr =
3176  DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3177  } else {
3178  if (!TM.isPositionIndependent())
3179  GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3180  else if (picLevel == PICLevel::SmallPIC)
3181  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3182  else
3183  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3184  }
3185  TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3186  }
3187  return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3188  }
3189 
3191  if (Subtarget.isUsingPCRelativeCalls()) {
3192  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3194  return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3195  }
3196 
3197  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3198  SDValue GOTPtr;
3199  if (is64bit) {
3200  setUsesTOCBasePtr(DAG);
3201  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3202  GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3203  GOTReg, TGA);
3204  } else {
3205  if (picLevel == PICLevel::SmallPIC)
3206  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3207  else
3208  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3209  }
3210  return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3211  GOTPtr, TGA, TGA);
3212  }
3213 
3214  if (Model == TLSModel::LocalDynamic) {
3215  if (Subtarget.isUsingPCRelativeCalls()) {
3216  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3218  SDValue MatPCRel =
3219  DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3220  return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3221  }
3222 
3223  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3224  SDValue GOTPtr;
3225  if (is64bit) {
3226  setUsesTOCBasePtr(DAG);
3227  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3228  GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3229  GOTReg, TGA);
3230  } else {
3231  if (picLevel == PICLevel::SmallPIC)
3232  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3233  else
3234  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3235  }
3236  SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3237  PtrVT, GOTPtr, TGA, TGA);
3238  SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3239  PtrVT, TLSAddr, TGA);
3240  return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3241  }
3242 
3243  llvm_unreachable("Unknown TLS model!");
3244 }
3245 
3246 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3247  SelectionDAG &DAG) const {
3248  EVT PtrVT = Op.getValueType();
3249  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3250  SDLoc DL(GSDN);
3251  const GlobalValue *GV = GSDN->getGlobal();
3252 
3253  // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3254  // The actual address of the GlobalValue is stored in the TOC.
3255  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3256  if (Subtarget.isUsingPCRelativeCalls()) {
3257  EVT Ty = getPointerTy(DAG.getDataLayout());
3258  if (isAccessedAsGotIndirect(Op)) {
3259  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3262  SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3263  SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3264  MachinePointerInfo());
3265  return Load;
3266  } else {
3267  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3269  return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3270  }
3271  }
3272  setUsesTOCBasePtr(DAG);
3273  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3274  return getTOCEntry(DAG, DL, GA);
3275  }
3276 
3277  unsigned MOHiFlag, MOLoFlag;
3278  bool IsPIC = isPositionIndependent();
3279  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3280 
3281  if (IsPIC && Subtarget.isSVR4ABI()) {
3282  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3283  GSDN->getOffset(),
3285  return getTOCEntry(DAG, DL, GA);
3286  }
3287 
3288  SDValue GAHi =
3289  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3290  SDValue GALo =
3291  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3292 
3293  return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3294 }
3295 
3296 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3297  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3298  SDLoc dl(Op);
3299 
3300  if (Op.getValueType() == MVT::v2i64) {
3301  // When the operands themselves are v2i64 values, we need to do something
3302  // special because VSX has no underlying comparison operations for these.
3303  if (Op.getOperand(0).getValueType() == MVT::v2i64) {
3304  // Equality can be handled by casting to the legal type for Altivec
3305  // comparisons, everything else needs to be expanded.
3306  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3307  return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
3308  DAG.getSetCC(dl, MVT::v4i32,
3309  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
3310  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
3311  CC));
3312  }
3313 
3314  return SDValue();
3315  }
3316 
3317  // We handle most of these in the usual way.
3318  return Op;
3319  }
3320 
3321  // If we're comparing for equality to zero, expose the fact that this is
3322  // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3323  // fold the new nodes.
3324  if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3325  return V;
3326 
3327  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3328  // Leave comparisons against 0 and -1 alone for now, since they're usually
3329  // optimized. FIXME: revisit this when we can custom lower all setcc
3330  // optimizations.
3331  if (C->isAllOnesValue() || C->isNullValue())
3332  return SDValue();
3333  }
3334 
3335  // If we have an integer seteq/setne, turn it into a compare against zero
3336  // by xor'ing the rhs with the lhs, which is faster than setting a
3337  // condition register, reading it back out, and masking the correct bit. The
3338  // normal approach here uses sub to do this instead of xor. Using xor exposes
3339  // the result to other bit-twiddling opportunities.
3340  EVT LHSVT = Op.getOperand(0).getValueType();
3341  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3342  EVT VT = Op.getValueType();
3343  SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3344  Op.getOperand(1));
3345  return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3346  }
3347  return SDValue();
3348 }
3349 
3350 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3351  SDNode *Node = Op.getNode();
3352  EVT VT = Node->getValueType(0);
3353  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3354  SDValue InChain = Node->getOperand(0);
3355  SDValue VAListPtr = Node->getOperand(1);
3356  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3357  SDLoc dl(Node);
3358 
3359  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3360 
3361  // gpr_index
3362  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3363  VAListPtr, MachinePointerInfo(SV), MVT::i8);
3364  InChain = GprIndex.getValue(1);
3365 
3366  if (VT == MVT::i64) {
3367  // Check if GprIndex is even
3368  SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3369  DAG.getConstant(1, dl, MVT::i32));
3370  SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3371  DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3372  SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3373  DAG.getConstant(1, dl, MVT::i32));
3374  // Align GprIndex to be even if it isn't
3375  GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3376  GprIndex);
3377  }
3378 
3379  // fpr index is 1 byte after gpr
3380  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3381  DAG.getConstant(1, dl, MVT::i32));
3382 
3383  // fpr
3384  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3385  FprPtr, MachinePointerInfo(SV), MVT::i8);
3386  InChain = FprIndex.getValue(1);
3387 
3388  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3389  DAG.getConstant(8, dl, MVT::i32));
3390 
3391  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3392  DAG.getConstant(4, dl, MVT::i32));
3393 
3394  // areas
3395  SDValue OverflowArea =
3396  DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3397  InChain = OverflowArea.getValue(1);
3398 
3399  SDValue RegSaveArea =
3400  DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3401  InChain = RegSaveArea.getValue(1);
3402 
3403  // select overflow_area if index > 8
3404  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3405  DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3406 
3407  // adjustment constant gpr_index * 4/8
3408  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3409  VT.isInteger() ? GprIndex : FprIndex,
3410  DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3411  MVT::i32));
3412 
3413  // OurReg = RegSaveArea + RegConstant
3414  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3415  RegConstant);
3416 
3417  // Floating types are 32 bytes into RegSaveArea
3418  if (VT.isFloatingPoint())
3419  OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3420  DAG.getConstant(32, dl, MVT::i32));
3421 
3422  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3423  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3424  VT.isInteger() ? GprIndex : FprIndex,
3425  DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3426  MVT::i32));
3427 
3428  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3429  VT.isInteger() ? VAListPtr : FprPtr,
3431 
3432  // determine if we should load from reg_save_area or overflow_area
3433  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3434 
3435  // increase overflow_area by 4/8 if gpr/fpr > 8
3436  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3437  DAG.getConstant(VT.isInteger() ? 4 : 8,
3438  dl, MVT::i32));
3439 
3440  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3441  OverflowAreaPlusN);
3442 
3443  InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3445 
3446  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3447 }
3448 
3449 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3450  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3451 
3452  // We have to copy the entire va_list struct:
3453  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3454  return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3455  DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3456  false, true, false, MachinePointerInfo(),
3457  MachinePointerInfo());
3458 }
3459 
3460 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3461  SelectionDAG &DAG) const {
3462  if (Subtarget.isAIXABI())
3463  report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3464 
3465  return Op.getOperand(0);
3466 }
3467 
3468 SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3469  MachineFunction &MF = DAG.getMachineFunction();
3470  PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3471 
3472  assert((Op.getOpcode() == ISD::INLINEASM ||
3473  Op.getOpcode() == ISD::INLINEASM_BR) &&
3474  "Expecting Inline ASM node.");
3475 
3476  // If an LR store is already known to be required then there is not point in
3477  // checking this ASM as well.
3478  if (MFI.isLRStoreRequired())
3479  return Op;
3480 
3481  // Inline ASM nodes have an optional last operand that is an incoming Flag of
3482  // type MVT::Glue. We want to ignore this last operand if that is the case.
3483  unsigned NumOps = Op.getNumOperands();
3484  if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3485  --NumOps;
3486 
3487  // Check all operands that may contain the LR.
3488  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3489  unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
3490  unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
3491  ++i; // Skip the ID value.
3492 
3493  switch (InlineAsm::getKind(Flags)) {
3494  default:
3495  llvm_unreachable("Bad flags!");
3497  case InlineAsm::Kind_Imm:
3498  case InlineAsm::Kind_Mem:
3499  i += NumVals;
3500  break;
3504  for (; NumVals; --NumVals, ++i) {
3505  Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3506  if (Reg != PPC::LR && Reg != PPC::LR8)
3507  continue;
3508  MFI.setLRStoreRequired();
3509  return Op;
3510  }
3511  break;
3512  }
3513  }
3514  }
3515 
3516  return Op;
3517 }
3518 
3519 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3520  SelectionDAG &DAG) const {
3521  if (Subtarget.isAIXABI())
3522  report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3523 
3524  SDValue Chain = Op.getOperand(0);
3525  SDValue Trmp = Op.getOperand(1); // trampoline
3526  SDValue FPtr = Op.getOperand(2); // nested function
3527  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3528  SDLoc dl(Op);
3529 
3530  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3531  bool isPPC64 = (PtrVT == MVT::i64);
3532  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3533 
3535  TargetLowering::ArgListEntry Entry;
3536 
3537  Entry.Ty = IntPtrTy;
3538  Entry.Node = Trmp; Args.push_back(Entry);
3539 
3540  // TrampSize == (isPPC64 ? 48 : 40);
3541  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3542  isPPC64 ? MVT::i64 : MVT::i32);
3543  Args.push_back(Entry);
3544 
3545  Entry.Node = FPtr; Args.push_back(Entry);
3546  Entry.Node = Nest; Args.push_back(Entry);
3547 
3548  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3550  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3552  DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3553 
3554  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3555  return CallResult.second;
3556 }
3557 
3558 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3559  MachineFunction &MF = DAG.getMachineFunction();
3560  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3561  EVT PtrVT = getPointerTy(MF.getDataLayout());
3562 
3563  SDLoc dl(Op);
3564 
3565  if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3566  // vastart just stores the address of the VarArgsFrameIndex slot into the
3567  // memory location argument.
3568  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3569  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3570  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3571  MachinePointerInfo(SV));
3572  }
3573 
3574  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3575  // We suppose the given va_list is already allocated.
3576  //
3577  // typedef struct {
3578  // char gpr; /* index into the array of 8 GPRs
3579  // * stored in the register save area
3580  // * gpr=0 corresponds to r3,
3581  // * gpr=1 to r4, etc.
3582  // */
3583  // char fpr; /* index into the array of 8 FPRs
3584  // * stored in the register save area
3585  // * fpr=0 corresponds to f1,
3586  // * fpr=1 to f2, etc.
3587  // */
3588  // char *overflow_arg_area;
3589  // /* location on stack that holds
3590  // * the next overflow argument
3591  // */
3592  // char *reg_save_area;
3593  // /* where r3:r10 and f1:f8 (if saved)
3594  // * are stored
3595  // */
3596  // } va_list[1];
3597 
3598  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3599  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3600  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3601  PtrVT);
3602  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3603  PtrVT);
3604 
3605  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3606  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3607 
3608  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3609  SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3610 
3611  uint64_t FPROffset = 1;
3612  SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3613 
3614  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3615 
3616  // Store first byte : number of int regs
3617  SDValue firstStore =
3618  DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3620  uint64_t nextOffset = FPROffset;
3621  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3622  ConstFPROffset);
3623 
3624  // Store second byte : number of float regs
3625  SDValue secondStore =
3626  DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3627  MachinePointerInfo(SV, nextOffset), MVT::i8);
3628  nextOffset += StackOffset;
3629  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3630 
3631  // Store second word : arguments given on stack
3632  SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3633  MachinePointerInfo(SV, nextOffset));
3634  nextOffset += FrameOffset;
3635  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3636 
3637  // Store third word : arguments given in registers
3638  return DAG.getStore(thirdStore, dl, FR, nextPtr,
3639  MachinePointerInfo(SV, nextOffset));
3640 }
3641 
3642 /// FPR - The set of FP registers that should be allocated for arguments
3643 /// on Darwin and AIX.
3644 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3645  PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3646  PPC::F11, PPC::F12, PPC::F13};
3647 
3648 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3649 /// the stack.
3650 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3651  unsigned PtrByteSize) {
3652  unsigned ArgSize = ArgVT.getStoreSize();
3653  if (Flags.isByVal())
3654  ArgSize = Flags.getByValSize();
3655 
3656  // Round up to multiples of the pointer size, except for array members,
3657  // which are always packed.
3658  if (!Flags.isInConsecutiveRegs())
3659  ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3660 
3661  return ArgSize;
3662 }
3663 
3664 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3665 /// on the stack.
3667  ISD::ArgFlagsTy Flags,
3668  unsigned PtrByteSize) {
3669  Align Alignment(PtrByteSize);
3670 
3671  // Altivec parameters are padded to a 16 byte boundary.
3672  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3673  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3674  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3675  ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3676  Alignment = Align(16);
3677 
3678  // ByVal parameters are aligned as requested.
3679  if (Flags.isByVal()) {
3680  auto BVAlign = Flags.getNonZeroByValAlign();
3681  if (BVAlign > PtrByteSize) {
3682  if (BVAlign.value() % PtrByteSize != 0)
3684  "ByVal alignment is not a multiple of the pointer size");
3685 
3686  Alignment = BVAlign;
3687  }
3688  }
3689 
3690  // Array members are always packed to their original alignment.
3691  if (Flags.isInConsecutiveRegs()) {
3692  // If the array member was split into multiple registers, the first
3693  // needs to be aligned to the size of the full type. (Except for
3694  // ppcf128, which is only aligned as its f64 components.)
3695  if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3696  Alignment = Align(OrigVT.getStoreSize());
3697  else
3698  Alignment = Align(ArgVT.getStoreSize());
3699  }
3700 
3701  return Alignment;
3702 }
3703 
3704 /// CalculateStackSlotUsed - Return whether this argument will use its
3705 /// stack slot (instead of being passed in registers). ArgOffset,
3706 /// AvailableFPRs, and AvailableVRs must hold the current argument
3707 /// position, and will be updated to account for this argument.
3708 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3709  unsigned PtrByteSize, unsigned LinkageSize,
3710  unsigned ParamAreaSize, unsigned &ArgOffset,
3711  unsigned &AvailableFPRs,
3712  unsigned &AvailableVRs) {
3713  bool UseMemory = false;
3714 
3715  // Respect alignment of argument on the stack.
3716  Align Alignment =
3717  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3718  ArgOffset = alignTo(ArgOffset, Alignment);
3719  // If there's no space left in the argument save area, we must
3720  // use memory (this check also catches zero-sized arguments).
3721  if (ArgOffset >= LinkageSize + ParamAreaSize)
3722  UseMemory = true;
3723 
3724  // Allocate argument on the stack.
3725  ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3726  if (Flags.isInConsecutiveRegsLast())
3727  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3728  // If we overran the argument save area, we must use memory
3729  // (this check catches arguments passed partially in memory)
3730  if (ArgOffset > LinkageSize + ParamAreaSize)
3731  UseMemory = true;
3732 
3733  // However, if the argument is actually passed in an FPR or a VR,
3734  // we don't use memory after all.
3735  if (!Flags.isByVal()) {
3736  if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3737  if (AvailableFPRs > 0) {
3738  --AvailableFPRs;
3739  return false;
3740  }
3741  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3742  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3743  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3744  ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3745  if (AvailableVRs > 0) {
3746  --AvailableVRs;
3747  return false;
3748  }
3749  }
3750 
3751  return UseMemory;
3752 }
3753 
3754 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3755 /// ensure minimum alignment required for target.
3757  unsigned NumBytes) {
3758  return alignTo(NumBytes, Lowering->getStackAlign());
3759 }
3760 
3761 SDValue PPCTargetLowering::LowerFormalArguments(
3762  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3763  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3764  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3765  if (Subtarget.isAIXABI())
3766  return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3767  InVals);
3768  if (Subtarget.is64BitELFABI())
3769  return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3770  InVals);
3771  assert(Subtarget.is32BitELFABI());
3772  return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3773  InVals);
3774 }
3775 
3776 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3777  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3778  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3779  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3780 
3781  // 32-bit SVR4 ABI Stack Frame Layout:
3782  // +-----------------------------------+
3783  // +--> | Back chain |
3784  // | +-----------------------------------+
3785  // | | Floating-point register save area |
3786  // | +-----------------------------------+
3787  // | | General register save area |
3788  // | +-----------------------------------+
3789  // | | CR save word |
3790  // | +-----------------------------------+
3791  // | | VRSAVE save word |
3792  // | +-----------------------------------+
3793  // | | Alignment padding |
3794  // | +-----------------------------------+
3795  // | | Vector register save area |
3796  // | +-----------------------------------+
3797  // | | Local variable space |
3798  // | +-----------------------------------+
3799  // | | Parameter list area |
3800  // | +-----------------------------------+
3801  // | | LR save word |
3802  // | +-----------------------------------+
3803  // SP--> +--- | Back chain |
3804  // +-----------------------------------+
3805  //
3806  // Specifications:
3807  // System V Application Binary Interface PowerPC Processor Supplement
3808  // AltiVec Technology Programming Interface Manual
3809 
3810  MachineFunction &MF = DAG.getMachineFunction();
3811  MachineFrameInfo &MFI = MF.getFrameInfo();
3812  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3813 
3814  EVT PtrVT = getPointerTy(MF.getDataLayout());
3815  // Potential tail calls could cause overwriting of argument stack slots.
3816  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3817  (CallConv == CallingConv::Fast));
3818  const Align PtrAlign(4);
3819 
3820  // Assign locations to all of the incoming arguments.
3822  PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3823  *DAG.getContext());
3824 
3825  // Reserve space for the linkage area on the stack.
3826  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3827  CCInfo.AllocateStack(LinkageSize, PtrAlign);
3828  if (useSoftFloat())
3829  CCInfo.PreAnalyzeFormalArguments(Ins);
3830 
3831  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3832  CCInfo.clearWasPPCF128();
3833 
3834  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3835  CCValAssign &VA = ArgLocs[i];
3836 
3837  // Arguments stored in registers.
3838  if (VA.isRegLoc()) {
3839  const TargetRegisterClass *RC;
3840  EVT ValVT = VA.getValVT();
3841 
3842  switch (ValVT.getSimpleVT().SimpleTy) {
3843  default:
3844  llvm_unreachable("ValVT not supported by formal arguments Lowering");
3845  case MVT::i1:
3846  case MVT::i32:
3847  RC = &PPC::GPRCRegClass;
3848  break;
3849  case MVT::f32:
3850  if (Subtarget.hasP8Vector())
3851  RC = &PPC::VSSRCRegClass;
3852  else if (Subtarget.hasSPE())
3853  RC = &PPC::GPRCRegClass;
3854  else
3855  RC = &PPC::F4RCRegClass;
3856  break;
3857  case MVT::f64:
3858  if (Subtarget.hasVSX())
3859  RC = &PPC::VSFRCRegClass;
3860  else if (Subtarget.hasSPE())
3861  // SPE passes doubles in GPR pairs.
3862  RC = &PPC::GPRCRegClass;
3863  else
3864  RC = &PPC::F8RCRegClass;
3865  break;
3866  case MVT::v16i8:
3867  case MVT::v8i16:
3868  case MVT::v4i32:
3869  RC = &PPC::VRRCRegClass;
3870  break;
3871  case MVT::v4f32:
3872  RC = &PPC::VRRCRegClass;
3873  break;
3874  case MVT::v2f64:
3875  case MVT::v2i64:
3876  RC = &PPC::VRRCRegClass;
3877  break;
3878  }
3879 
3880  SDValue ArgValue;
3881  // Transform the arguments stored in physical registers into
3882  // virtual ones.
3883  if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3884  assert(i + 1 < e && "No second half of double precision argument");
3885  unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3886  unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3887  SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3888  SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3889  if (!Subtarget.isLittleEndian())
3890  std::swap (ArgValueLo, ArgValueHi);
3891  ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3892  ArgValueHi);
3893  } else {
3894  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3895  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3896  ValVT == MVT::i1 ? MVT::i32 : ValVT);
3897  if (ValVT == MVT::i1)
3898  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3899  }
3900 
3901  InVals.push_back(ArgValue);
3902  } else {
3903  // Argument stored in memory.
3904  assert(VA.isMemLoc());
3905 
3906  // Get the extended size of the argument type in stack
3907  unsigned ArgSize = VA.getLocVT().getStoreSize();
3908  // Get the actual size of the argument type
3909  unsigned ObjSize = VA.getValVT().getStoreSize();
3910  unsigned ArgOffset = VA.getLocMemOffset();
3911  // Stack objects in PPC32 are right justified.
3912  ArgOffset += ArgSize - ObjSize;
3913  int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3914 
3915  // Create load nodes to retrieve arguments from the stack.
3916  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3917  InVals.push_back(
3918  DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3919  }
3920  }
3921 
3922  // Assign locations to all of the incoming aggregate by value arguments.
3923  // Aggregates passed by value are stored in the local variable space of the
3924  // caller's stack frame, right above the parameter list area.
3925  SmallVector<CCValAssign, 16> ByValArgLocs;
3926  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3927  ByValArgLocs, *DAG.getContext());
3928 
3929  // Reserve stack space for the allocations in CCInfo.
3930  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
3931 
3932  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3933 
3934  // Area that is at least reserved in the caller of this function.
3935  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3936  MinReservedArea = std::max(MinReservedArea, LinkageSize);
3937 
3938  // Set the size that is at least reserved in caller of this function. Tail
3939  // call optimized function's reserved stack space needs to be aligned so that
3940  // taking the difference between two stack areas will result in an aligned
3941  // stack.
3942  MinReservedArea =
3943  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3944  FuncInfo->setMinReservedArea(MinReservedArea);
3945 
3946  SmallVector<SDValue, 8> MemOps;
3947 
3948  // If the function takes variable number of arguments, make a frame index for
3949  // the start of the first vararg value... for expansion of llvm.va_start.
3950  if (isVarArg) {
3951  static const MCPhysReg GPArgRegs[] = {
3952  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3953  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3954  };
3955  const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3956 
3957  static const MCPhysReg FPArgRegs[] = {
3958  PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3959  PPC::F8
3960  };
3961  unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3962 
3963  if (useSoftFloat() || hasSPE())
3964  NumFPArgRegs = 0;
3965 
3966  FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3967  FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3968 
3969  // Make room for NumGPArgRegs and NumFPArgRegs.
3970  int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3971  NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3972 
3973  FuncInfo->setVarArgsStackOffset(
3974  MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3975  CCInfo.getNextStackOffset(), true));
3976 
3977  FuncInfo->setVarArgsFrameIndex(
3978  MFI.CreateStackObject(Depth, Align(8), false));
3979  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3980 
3981  // The fixed integer arguments of a variadic function are stored to the
3982  // VarArgsFrameIndex on the stack so that they may be loaded by
3983  // dereferencing the result of va_next.
3984  for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3985  // Get an existing live-in vreg, or add a new one.
3986  unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3987  if (!VReg)
3988  VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3989 
3990  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3991  SDValue Store =
3992  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3993  MemOps.push_back(Store);
3994  // Increment the address by four for the next argument to store
3995  SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3996  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3997  }
3998 
3999  // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4000  // is set.
4001  // The double arguments are stored to the VarArgsFrameIndex
4002  // on the stack.
4003  for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4004  // Get an existing live-in vreg, or add a new one.
4005  unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4006  if (!VReg)
4007  VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4008 
4009  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4010  SDValue Store =
4011  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4012  MemOps.push_back(Store);
4013  // Increment the address by eight for the next argument to store
4014  SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4015  PtrVT);
4016  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4017  }
4018  }
4019 
4020  if (!MemOps.empty())
4021  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4022 
4023  return Chain;
4024 }
4025 
4026 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4027 // value to MVT::i64 and then truncate to the correct register size.
4028 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4029  EVT ObjectVT, SelectionDAG &DAG,
4030  SDValue ArgVal,
4031  const SDLoc &dl) const {
4032  if (Flags.isSExt())
4033  ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4034  DAG.getValueType(ObjectVT));
4035  else if (Flags.isZExt())
4036  ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4037  DAG.getValueType(ObjectVT));
4038 
4039  return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4040 }
4041 
4042 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4043  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4044  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4045  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4046  // TODO: add description of PPC stack frame format, or at least some docs.
4047  //
4048  bool isELFv2ABI = Subtarget.isELFv2ABI();
4049  bool isLittleEndian = Subtarget.isLittleEndian();
4050  MachineFunction &MF = DAG.getMachineFunction();
4051  MachineFrameInfo &MFI = MF.getFrameInfo();
4052  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4053 
4054  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4055  "fastcc not supported on varargs functions");
4056 
4057  EVT PtrVT = getPointerTy(MF.getDataLayout());
4058  // Potential tail calls could cause overwriting of argument stack slots.
4059  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4060  (CallConv == CallingConv::Fast));
4061  unsigned PtrByteSize = 8;
4062  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4063 
4064  static const MCPhysReg GPR[] = {
4065  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4066  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4067  };
4068  static const MCPhysReg VR[] = {
4069  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4070  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4071  };
4072 
4073  const unsigned Num_GPR_Regs = array_lengthof(GPR);
4074  const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4075  const unsigned Num_VR_Regs = array_lengthof(VR);
4076 
4077  // Do a first pass over the arguments to determine whether the ABI
4078  // guarantees that our caller has allocated the parameter save area
4079  // on its stack frame. In the ELFv1 ABI, this is always the case;
4080  // in the ELFv2 ABI, it is true if this is a vararg function or if
4081  // any parameter is located in a stack slot.
4082 
4083  bool HasParameterArea = !isELFv2ABI || isVarArg;
4084  unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4085  unsigned NumBytes = LinkageSize;
4086  unsigned AvailableFPRs = Num_FPR_Regs;
4087  unsigned AvailableVRs = Num_VR_Regs;
4088  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4089  if (Ins[i].Flags.isNest())
4090  continue;
4091 
4092  if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4093  PtrByteSize, LinkageSize, ParamAreaSize,
4094  NumBytes, AvailableFPRs, AvailableVRs))
4095  HasParameterArea = true;
4096  }
4097 
4098  // Add DAG nodes to load the arguments or copy them out of registers. On
4099  // entry to a function on PPC, the arguments start after the linkage area,
4100  // although the first ones are often in registers.
4101 
4102  unsigned ArgOffset = LinkageSize;
4103  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4104  SmallVector<SDValue, 8> MemOps;
4106  unsigned CurArgIdx = 0;
4107  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4108  SDValue ArgVal;
4109  bool needsLoad = false;
4110  EVT ObjectVT = Ins[ArgNo].VT;
4111  EVT OrigVT = Ins[ArgNo].ArgVT;
4112  unsigned ObjSize = ObjectVT.getStoreSize();
4113  unsigned ArgSize = ObjSize;
4114  ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4115  if (Ins[ArgNo].isOrigArg()) {
4116  std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4117  CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4118  }
4119  // We re-align the argument offset for each argument, except when using the
4120  // fast calling convention, when we need to make sure we do that only when
4121  // we'll actually use a stack slot.
4122  unsigned CurArgOffset;
4123  Align Alignment;
4124  auto ComputeArgOffset = [&]() {
4125  /* Respect alignment of argument on the stack. */
4126  Alignment =
4127  CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4128  ArgOffset = alignTo(ArgOffset, Alignment);
4129  CurArgOffset = ArgOffset;
4130  };
4131 
4132  if (CallConv != CallingConv::Fast) {
4133  ComputeArgOffset();
4134 
4135  /* Compute GPR index associated with argument offset. */
4136  GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4137  GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4138  }
4139 
4140  // FIXME the codegen can be much improved in some cases.
4141  // We do not have to keep everything in memory.
4142  if (Flags.isByVal()) {
4143  assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4144 
4145  if (CallConv == CallingConv::Fast)
4146  ComputeArgOffset();
4147 
4148  // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4149  ObjSize = Flags.getByValSize();
4150  ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4151  // Empty aggregate parameters do not take up registers. Examples:
4152  // struct { } a;
4153  // union { } b;
4154  // int c[0];
4155  // etc. However, we have to provide a place-holder in InVals, so
4156  // pretend we have an 8-byte item at the current address for that
4157  // purpose.
4158  if (!ObjSize) {
4159  int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4160  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4161  InVals.push_back(FIN);
4162  continue;
4163  }
4164 
4165  // Create a stack object covering all stack doublewords occupied
4166  // by the argument. If the argument is (fully or partially) on
4167  // the stack, or if the argument is fully in registers but the
4168  // caller has allocated the parameter save anyway, we can refer
4169  // directly to the caller's stack frame. Otherwise, create a
4170  // local copy in our own frame.
4171  int FI;
4172  if (HasParameterArea ||
4173  ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4174  FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4175  else
4176  FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4177  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4178 
4179  // Handle aggregates smaller than 8 bytes.
4180  if (ObjSize < PtrByteSize) {
4181  // The value of the object is its address, which differs from the
4182  // address of the enclosing doubleword on big-endian systems.
4183  SDValue Arg = FIN;
4184  if (!isLittleEndian) {
4185  SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4186  Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4187  }
4188  InVals.push_back(Arg);
4189 
4190  if (GPR_idx != Num_GPR_Regs) {
4191  unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4192  FuncInfo->addLiveInAttr(VReg, Flags);
4193  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4194  SDValue Store;
4195 
4196  if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4197  EVT ObjType = (ObjSize == 1 ? MVT::i8 :
4198  (ObjSize == 2 ? MVT::i16 : MVT::i32));
4199  Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4200  MachinePointerInfo(&*FuncArg), ObjType);
4201  } else {
4202  // For sizes that don't fit a truncating store (3, 5, 6, 7),
4203  // store the whole register as-is to the parameter save area
4204  // slot.
4205  Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4206  MachinePointerInfo(&*FuncArg));
4207  }
4208 
4209  MemOps.push_back(Store);
4210  }
4211  // Whether we copied from a register or not, advance the offset
4212  // into the parameter save area by a full doubleword.
4213  ArgOffset += PtrByteSize;
4214  continue;
4215  }
4216 
4217  // The value of the object is its address, which is the address of
4218  // its first stack doubleword.
4219  InVals.push_back(FIN);
4220 
4221  // Store whatever pieces of the object are in registers to memory.
4222  for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4223  if (GPR_idx == Num_GPR_Regs)
4224  break;
4225 
4226  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4227  FuncInfo->addLiveInAttr(VReg, Flags);
4228  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4229  SDValue Addr = FIN;
4230  if (j) {
4231  SDValue Off = DAG.getConstant(j, dl, PtrVT);
4232  Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4233  }
4234  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4235  MachinePointerInfo(&*FuncArg, j));
4236  MemOps.push_back(Store);
4237  ++GPR_idx;
4238  }
4239  ArgOffset += ArgSize;
4240  continue;
4241  }
4242 
4243  switch (ObjectVT.getSimpleVT().SimpleTy) {
4244  default: llvm_unreachable("Unhandled argument type!");
4245  case MVT::i1:
4246  case MVT::i32:
4247  case MVT::i64:
4248  if (Flags.isNest()) {
4249  // The 'nest' parameter, if any, is passed in R11.
4250  unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4251  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4252 
4253  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4254  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4255 
4256  break;
4257  }
4258 
4259  // These can be scalar arguments or elements of an integer array type
4260  // passed directly. Clang may use those instead of "byval" aggregate
4261  // types to avoid forcing arguments to memory unnecessarily.
4262  if (GPR_idx != Num_GPR_Regs) {
4263  unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4264  FuncInfo->addLiveInAttr(VReg, Flags);
4265  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4266 
4267  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4268  // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4269  // value to MVT::i64 and then truncate to the correct register size.
4270  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4271  } else {
4272  if (CallConv == CallingConv::Fast)
4273  ComputeArgOffset();
4274 
4275  needsLoad = true;
4276  ArgSize = PtrByteSize;
4277  }
4278  if (CallConv != CallingConv::Fast || needsLoad)
4279  ArgOffset += 8;
4280  break;
4281 
4282  case MVT::f32:
4283  case MVT::f64:
4284  // These can be scalar arguments or elements of a float array type
4285  // passed directly. The latter are used to implement ELFv2 homogenous
4286  // float aggregates.
4287  if (FPR_idx != Num_FPR_Regs) {
4288  unsigned VReg;
4289 
4290  if (ObjectVT == MVT::f32)
4291  VReg = MF.addLiveIn(FPR[FPR_idx],
4292  Subtarget.hasP8Vector()
4293  ? &PPC::VSSRCRegClass
4294  : &PPC::F4RCRegClass);
4295  else
4296  VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4297  ? &PPC::VSFRCRegClass
4298  : &PPC::F8RCRegClass);
4299 
4300  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4301  ++FPR_idx;
4302  } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4303  // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4304  // once we support fp <-> gpr moves.
4305 
4306  // This can only ever happen in the presence of f32 array types,
4307  // since otherwise we never run out of FPRs before running out
4308  // of GPRs.
4309  unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4310  FuncInfo->addLiveInAttr(VReg, Flags);
4311  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4312 
4313  if (ObjectVT == MVT::f32) {
4314  if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4315  ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4316  DAG.getConstant(32, dl, MVT::i32));
4317  ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4318  }
4319 
4320  ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4321  } else {
4322  if (CallConv == CallingConv::Fast)
4323  ComputeArgOffset();
4324 
4325  needsLoad = true;
4326  }
4327 
4328  // When passing an array of floats, the array occupies consecutive
4329  // space in the argument area; only round up to the next doubleword
4330  // at the end of the array. Otherwise, each float takes 8 bytes.
4331  if (CallConv != CallingConv::Fast || needsLoad) {
4332  ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4333  ArgOffset += ArgSize;
4334  if (Flags.isInConsecutiveRegsLast())
4335  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4336  }
4337  break;
4338  case MVT::v4f32:
4339  case MVT::v4i32:
4340  case MVT::v8i16:
4341  case MVT::v16i8:
4342  case MVT::v2f64:
4343  case MVT::v2i64:
4344  case MVT::v1i128:
4345  case MVT::f128:
4346  // These can be scalar arguments or elements of a vector array type
4347  // passed directly. The latter are used to implement ELFv2 homogenous
4348  // vector aggregates.
4349  if (VR_idx != Num_VR_Regs) {
4350  unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4351  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4352  ++VR_idx;
4353  } else {
4354  if (CallConv == CallingConv::Fast)
4355  ComputeArgOffset();
4356  needsLoad = true;
4357  }
4358  if (CallConv != CallingConv::Fast || needsLoad)
4359  ArgOffset += 16;
4360  break;
4361  }
4362 
4363  // We need to load the argument to a virtual register if we determined
4364  // above that we ran out of physical registers of the appropriate type.
4365  if (needsLoad) {
4366  if (ObjSize < ArgSize && !isLittleEndian)
4367  CurArgOffset += ArgSize - ObjSize;
4368  int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4369  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4370  ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4371  }
4372 
4373  InVals.push_back(ArgVal);
4374  }
4375 
4376  // Area that is at least reserved in the caller of this function.
4377  unsigned MinReservedArea;
4378  if (HasParameterArea)
4379  MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4380  else
4381  MinReservedArea = LinkageSize;
4382 
4383  // Set the size that is at least reserved in caller of this function. Tail
4384  // call optimized functions' reserved stack space needs to be aligned so that
4385  // taking the difference between two stack areas will result in an aligned
4386  // stack.
4387  MinReservedArea =
4388  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4389  FuncInfo->setMinReservedArea(MinReservedArea);
4390 
4391  // If the function takes variable number of arguments, make a frame index for
4392  // the start of the first vararg value... for expansion of llvm.va_start.
4393  // On ELFv2ABI spec, it writes:
4394  // C programs that are intended to be *portable* across different compilers
4395  // and architectures must use the header file <stdarg.h> to deal with variable
4396  // argument lists.
4397  if (isVarArg && MFI.hasVAStart()) {
4398  int Depth = ArgOffset;
4399 
4400  FuncInfo->setVarArgsFrameIndex(
4401  MFI.CreateFixedObject(PtrByteSize, Depth, true));
4402  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4403 
4404  // If this function is vararg, store any remaining integer argument regs
4405  // to their spots on the stack so that they may be loaded by dereferencing
4406  // the result of va_next.
4407  for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4408  GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4409  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4410  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4411  SDValue Store =
4412  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4413  MemOps.push_back(Store);
4414  // Increment the address by four for the next argument to store
4415  SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4416  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4417  }
4418  }
4419 
4420  if (!MemOps.empty())
4421  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4422 
4423  return Chain;
4424 }
4425 
4426 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4427 /// adjusted to accommodate the arguments for the tailcall.
4428 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4429  unsigned ParamSize) {
4430 
4431  if (!isTailCall) return 0;
4432 
4434  unsigned CallerMinReservedArea = FI->getMinReservedArea();
4435  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4436  // Remember only if the new adjustment is bigger.
4437  if (SPDiff < FI->getTailCallSPDelta())
4438  FI->setTailCallSPDelta(SPDiff);
4439 
4440  return SPDiff;
4441 }
4442 
4443 static bool isFunctionGlobalAddress(SDValue Callee);
4444 
4445 static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4446  const TargetMachine &TM) {
4447  // It does not make sense to call callsShareTOCBase() with a caller that
4448  // is PC Relative since PC Relative callers do not have a TOC.
4449 #ifndef NDEBUG
4450  const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4451  assert(!STICaller->isUsingPCRelativeCalls() &&
4452  "PC Relative callers do not have a TOC and cannot share a TOC Base");
4453 #endif
4454 
4455  // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4456  // don't have enough information to determine if the caller and callee share
4457  // the same TOC base, so we have to pessimistically assume they don't for
4458  // correctness.
4459  GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4460  if (!G)
4461  return false;
4462 
4463  const GlobalValue *GV = G->getGlobal();
4464 
4465  // If the callee is preemptable, then the static linker will use a plt-stub
4466  // which saves the toc to the stack, and needs a nop after the call
4467  // instruction to convert to a toc-restore.
4468  if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4469  return false;
4470 
4471  // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4472  // We may need a TOC restore in the situation where the caller requires a
4473  // valid TOC but the callee is PC Relative and does not.
4474  const Function *F = dyn_cast<Function>(GV);
4475  const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4476 
4477  // If we have an Alias we can try to get the function from there.
4478  if (Alias) {
4479  const GlobalObject *GlobalObj = Alias->getBaseObject();
4480  F = dyn_cast<Function>(GlobalObj);
4481  }
4482 
4483  // If we still have no valid function pointer we do not have enough
4484  // information to determine if the callee uses PC Relative calls so we must
4485  // assume that it does.
4486  if (!F)
4487  return false;
4488 
4489  // If the callee uses PC Relative we cannot guarantee that the callee won't
4490  // clobber the TOC of the caller and so we must assume that the two
4491  // functions do not share a TOC base.
4492  const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4493  if (STICallee->isUsingPCRelativeCalls())
4494  return false;
4495 
4496  // If the GV is not a strong definition then we need to assume it can be
4497  // replaced by another function at link time. The function that replaces
4498  // it may not share the same TOC as the caller since the callee may be
4499  // replaced by a PC Relative version of the same function.
4500  if (!GV->isStrongDefinitionForLinker())
4501  return false;
4502 
4503  // The medium and large code models are expected to provide a sufficiently
4504  // large TOC to provide all data addressing needs of a module with a
4505  // single TOC.
4506  if (CodeModel::Medium == TM.getCodeModel() ||
4507  CodeModel::Large == TM.getCodeModel())
4508  return true;
4509 
4510  // Any explicitly-specified sections and section prefixes must also match.
4511  // Also, if we're using -ffunction-sections, then each function is always in
4512  // a different section (the same is true for COMDAT functions).
4513  if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4514  GV->getSection() != Caller->getSection())
4515  return false;
4516  if (const auto *F = dyn_cast<Function>(GV)) {
4517  if (F->getSectionPrefix() != Caller->getSectionPrefix())
4518  return false;
4519  }
4520 
4521  return true;
4522 }
4523 
4524 static bool
4526  const SmallVectorImpl<ISD::OutputArg> &Outs) {
4527  assert(Subtarget.is64BitELFABI());
4528 
4529  const unsigned PtrByteSize = 8;
4530  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4531 
4532  static const MCPhysReg GPR[] = {
4533  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4534  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4535  };
4536  static const MCPhysReg VR[] = {
4537  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4538  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4539  };
4540 
4541  const unsigned NumGPRs = array_lengthof(GPR);
4542  const unsigned NumFPRs = 13;
4543  const unsigned NumVRs = array_lengthof(VR);
4544  const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4545 
4546  unsigned NumBytes = LinkageSize;
4547  unsigned AvailableFPRs = NumFPRs;
4548  unsigned AvailableVRs = NumVRs;
4549 
4550  for (const ISD::OutputArg& Param : Outs) {
4551  if (Param.Flags.isNest()) continue;
4552 
4553  if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4554  LinkageSize, ParamAreaSize, NumBytes,
4555  AvailableFPRs, AvailableVRs))
4556  return true;
4557  }
4558  return false;
4559 }
4560 
4561 static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4562  if (CB.arg_size() != CallerFn->arg_size())
4563  return false;
4564 
4565  auto CalleeArgIter = CB.arg_begin();
4566  auto CalleeArgEnd = CB.arg_end();
4567  Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4568 
4569  for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4570  const Value* CalleeArg = *CalleeArgIter;
4571  const Value* CallerArg = &(*CallerArgIter);
4572  if (CalleeArg == CallerArg)
4573  continue;
4574 
4575  // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4576  // tail call @callee([4 x i64] undef, [4 x i64] %b)
4577  // }
4578  // 1st argument of callee is undef and has the same type as caller.
4579  if (CalleeArg->getType() == CallerArg->getType() &&
4580  isa<UndefValue>(CalleeArg))
4581  continue;
4582 
4583  return false;
4584  }
4585 
4586  return true;
4587 }
4588 
4589 // Returns true if TCO is possible between the callers and callees
4590 // calling conventions.
4591 static bool
4593  CallingConv::ID CalleeCC) {
4594  // Tail calls are possible with fastcc and ccc.
4595  auto isTailCallableCC = [] (CallingConv::ID CC){
4596  return CC == CallingConv::C || CC == CallingConv::Fast;
4597  };
4598  if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4599  return false;
4600 
4601  // We can safely tail call both fastcc and ccc callees from a c calling
4602  // convention caller. If the caller is fastcc, we may have less stack space
4603  // than a non-fastcc caller with the same signature so disable tail-calls in
4604  // that case.
4605  return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4606 }
4607 
4608 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4609  SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4610  const SmallVectorImpl<ISD::OutputArg> &Outs,
4611  const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4612  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4613 
4614  if (DisableSCO && !TailCallOpt) return false;
4615 
4616  // Variadic argument functions are not supported.
4617  if (isVarArg) return false;
4618 
4619  auto &Caller = DAG.getMachineFunction().getFunction();
4620  // Check that the calling conventions are compatible for tco.
4621  if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4622  return false;
4623 
4624  // Caller contains any byval parameter is not supported.
4625  if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4626  return false;
4627 
4628  // Callee contains any byval parameter is not supported, too.
4629  // Note: This is a quick work around, because in some cases, e.g.
4630  // caller's stack size > callee's stack size, we are still able to apply
4631  // sibling call optimization. For example, gcc is able to do SCO for caller1
4632  // in the following example, but not for caller2.
4633  // struct test {
4634  // long int a;
4635  // char ary[56];
4636  // } gTest;
4637  // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4638  // b->a = v.a;
4639  // return 0;
4640  // }
4641  // void caller1(struct test a, struct test c, struct test *b) {
4642  // callee(gTest, b); }
4643  // void caller2(struct test *b) { callee(gTest, b); }
4644  if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4645  return false;
4646 
4647  // If callee and caller use different calling conventions, we cannot pass
4648  // parameters on stack since offsets for the parameter area may be different.
4649  if (Caller.getCallingConv() != CalleeCC &&
4650  needStackSlotPassParameters(Subtarget, Outs))
4651  return false;
4652 
4653  // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4654  // the caller and callee share the same TOC for TCO/SCO. If the caller and
4655  // callee potentially have different TOC bases then we cannot tail call since
4656  // we need to restore the TOC pointer after the call.
4657  // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4658  // We cannot guarantee this for indirect calls or calls to external functions.
4659  // When PC-Relative addressing is used, the concept of the TOC is no longer
4660  // applicable so this check is not required.
4661  // Check first for indirect calls.
4662  if (!Subtarget.isUsingPCRelativeCalls() &&
4663  !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4664  return false;
4665 
4666  // Check if we share the TOC base.
4667  if (!Subtarget.isUsingPCRelativeCalls() &&
4668  !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4669  return false;
4670 
4671  // TCO allows altering callee ABI, so we don't have to check further.
4672  if (CalleeCC == CallingConv::Fast && TailCallOpt)
4673  return true;
4674 
4675  if (DisableSCO) return false;
4676 
4677  // If callee use the same argument list that caller is using, then we can
4678  // apply SCO on this case. If it is not, then we need to check if callee needs
4679  // stack for passing arguments.
4680  // PC Relative tail calls may not have a CallBase.
4681  // If there is no CallBase we cannot verify if we have the same argument
4682  // list so assume that we don't have the same argument list.
4683  if (CB && !hasSameArgumentList(&Caller, *CB) &&
4684  needStackSlotPassParameters(Subtarget, Outs))
4685  return false;
4686  else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4687  return false;
4688 
4689  return true;
4690 }
4691 
4692 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4693 /// for tail call optimization. Targets which want to do tail call
4694 /// optimization should implement this function.
4695 bool
4696 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4697  CallingConv::ID CalleeCC,
4698  bool isVarArg,
4700  SelectionDAG& DAG) const {
4701  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4702  return false;
4703 
4704  // Variable argument functions are not supported.
4705  if (isVarArg)
4706  return false;
4707 
4708  MachineFunction &MF = DAG.getMachineFunction();
4709  CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4710  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4711  // Functions containing by val parameters are not supported.
4712  for (unsigned i = 0; i != Ins.size(); i++) {
4713  ISD::ArgFlagsTy Flags = Ins[i].Flags;
4714  if (Flags.isByVal()) return false;
4715  }
4716 
4717  // Non-PIC/GOT tail calls are supported.
4718  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4719  return true;
4720 
4721  // At the moment we can only do local tail calls (in same module, hidden
4722  // or protected) if we are generating PIC.
4723  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4724  return G->getGlobal()->hasHiddenVisibility()
4725  || G->getGlobal()->hasProtectedVisibility();
4726  }
4727 
4728  return false;
4729 }
4730 
4731 /// isCallCompatibleAddress - Return the immediate to use if the specified
4732 /// 32-bit value is representable in the immediate field of a BxA instruction.
4734  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4735  if (!C) return nullptr;
4736 
4737  int Addr = C->getZExtValue();
4738  if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4739  SignExtend32<26>(Addr) != Addr)
4740  return nullptr; // Top 6 bits have to be sext of immediate.
4741 
4742  return DAG
4743  .getConstant(
4744  (int)C->getZExtValue() >> 2, SDLoc(Op),
4746  .getNode();
4747 }
4748 
4749 namespace {
4750 
4751 struct TailCallArgumentInfo {
4752  SDValue Arg;
4753  SDValue FrameIdxOp;
4754  int FrameIdx = 0;
4755 
4756  TailCallArgumentInfo() = default;
4757 };
4758 
4759 } // end anonymous namespace
4760 
4761 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4763  SelectionDAG &DAG, SDValue Chain,
4764  const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4765  SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4766  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4767  SDValue Arg = TailCallArgs[i].Arg;
4768  SDValue FIN = TailCallArgs[i].FrameIdxOp;
4769  int FI = TailCallArgs[i].FrameIdx;
4770  // Store relative to framepointer.
4771  MemOpChains.push_back(DAG.getStore(
4772  Chain, dl, Arg, FIN,
4774  }
4775 }
4776 
4777 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4778 /// the appropriate stack slot for the tail call optimized function call.
4780  SDValue OldRetAddr, SDValue OldFP,
4781  int SPDiff, const SDLoc &dl) {
4782  if (SPDiff) {
4783  // Calculate the new stack slot for the return address.
4784  MachineFunction &MF = DAG.getMachineFunction();
4785  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4786  const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4787  bool isPPC64 = Subtarget.isPPC64();
4788  int SlotSize = isPPC64 ? 8 : 4;
4789  int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4790  int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4791  NewRetAddrLoc, true);
4792  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4793  SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4794  Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4795  MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4796  }
4797  return Chain;
4798 }
4799 
4800 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4801 /// the position of the argument.
4802 static void
4804  SDValue Arg, int SPDiff, unsigned ArgOffset,
4805  SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4806  int Offset = ArgOffset + SPDiff;
4807  uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4808  int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4809  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4810  SDValue FIN = DAG.getFrameIndex(FI, VT);
4811  TailCallArgumentInfo Info;
4812  Info.Arg = Arg;
4813  Info.FrameIdxOp = FIN;
4814  Info.FrameIdx = FI;
4815  TailCallArguments.push_back(Info);
4816 }
4817 
4818 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4819 /// stack slot. Returns the chain as result and the loaded frame pointers in
4820 /// LROpOut/FPOpout. Used when tail calling.
4821 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4822  SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4823  SDValue &FPOpOut, const SDLoc &dl) const {
4824  if (SPDiff) {
4825  // Load the LR and FP stack slot for later adjusting.
4826  EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4827  LROpOut = getReturnAddrFrameIndex(DAG);
4828  LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4829  Chain = SDValue(LROpOut.getNode(), 1);
4830  }
4831  return Chain;
4832 }
4833 
4834 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4835 /// by "Src" to address "Dst" of size "Size". Alignment information is
4836 /// specified by the specific parameter attribute. The copy will be passed as
4837 /// a byval function parameter.
4838 /// Sometimes what we are copying is the end of a larger object, the part that
4839 /// does not fit in registers.
4841  SDValue Chain, ISD::ArgFlagsTy Flags,
4842  SelectionDAG &DAG, const SDLoc &dl) {
4843  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4844  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
4845  Flags.getNonZeroByValAlign(), false, false, false,
4847 }
4848 
4849 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4850 /// tail calls.
4851 static void LowerMemOpCallTo(
4852  SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4853  SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4854  bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4855  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4856  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4857  if (!isTailCall) {
4858  if (isVector) {
4859  SDValue StackPtr;
4860  if (isPPC64)
4861  StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4862  else
4863  StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4864  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4865  DAG.getConstant(ArgOffset, dl, PtrVT));
4866  }
4867  MemOpChains.push_back(
4868  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4869  // Calculate and remember argument location.
4870  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4871  TailCallArguments);
4872 }
4873 
4874 static void
4876  const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4877  SDValue FPOp,
4878  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4879  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4880  // might overwrite each other in case of tail call optimization.
4881  SmallVector<SDValue, 8> MemOpChains2;
4882  // Do not flag preceding copytoreg stuff together with the following stuff.
4883  InFlag = SDValue();
4884  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4885  MemOpChains2, dl);
4886  if (!MemOpChains2.empty())
4887  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4888 
4889  // Store the return address to the appropriate stack slot.
4890  Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4891 
4892  // Emit callseq_end just before tailcall node.
4893  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4894  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4895  InFlag = Chain.getValue(1);
4896 }
4897 
4898 // Is this global address that of a function that can be called by name? (as
4899 // opposed to something that must hold a descriptor for an indirect call).
4900 static bool isFunctionGlobalAddress(SDValue Callee) {
4901  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4902  if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4903  Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4904  return false;
4905 
4906  return G->getGlobal()->getValueType()->isFunctionTy();
4907  }
4908 
4909  return false;
4910 }
4911 
4912 SDValue PPCTargetLowering::LowerCallResult(
4913  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
4914  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4915  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4917  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4918  *DAG.getContext());
4919 
4920  CCRetInfo.AnalyzeCallResult(
4921  Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
4922  ? RetCC_PPC_Cold
4923  : RetCC_PPC);
4924 
4925  // Copy all of the result registers out of their specified physreg.
4926  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4927  CCValAssign &VA = RVLocs[i];
4928  assert(VA.isRegLoc() && "Can only return in registers!");
4929 
4930  SDValue Val;
4931 
4932  if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
4933  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
4934  InFlag);
4935  Chain = Lo.getValue(1);
4936  InFlag = Lo.getValue(2);
4937  VA = RVLocs[++i]; // skip ahead to next loc
4938  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
4939  InFlag);
4940  Chain = Hi.getValue(1);
4941  InFlag = Hi.getValue(2);
4942  if (!Subtarget.isLittleEndian())
4943  std::swap (Lo, Hi);
4944  Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
4945  } else {
4946  Val = DAG.getCopyFromReg(Chain, dl,
4947  VA.getLocReg(), VA.getLocVT(), InFlag);
4948  Chain = Val.getValue(1);
4949  InFlag = Val.getValue(2);
4950  }
4951 
4952  switch (VA.getLocInfo()) {
4953  default: llvm_unreachable("Unknown loc info!");
4954  case CCValAssign::Full: break;
4955  case CCValAssign::AExt:
4956  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4957  break;
4958  case CCValAssign::ZExt:
4959  Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4960  DAG.getValueType(VA.getValVT()));
4961  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4962  break;
4963  case CCValAssign::SExt:
4964  Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4965  DAG.getValueType(VA.getValVT()));
4966  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4967  break;
4968  }
4969 
4970  InVals.push_back(Val);
4971  }
4972 
4973  return Chain;
4974 }
4975 
4976 static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
4977  const PPCSubtarget &Subtarget, bool isPatchPoint) {
4978  // PatchPoint calls are not indirect.
4979  if (isPatchPoint)
4980  return false;
4981 
4982  if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
4983  return false;
4984 
4985  // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
4986  // becuase the immediate function pointer points to a descriptor instead of
4987  // a function entry point. The ELFv2 ABI cannot use a BLA because the function
4988  // pointer immediate points to the global entry point, while the BLA would
4989  // need to jump to the local entry point (see rL211174).
4990  if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
4992  return false;
4993 
4994  return true;
4995 }
4996 
4997 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
4998 static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
4999  return Subtarget.isAIXABI() ||
5000  (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5001 }
5002 
5004  const Function &Caller,
5005  const SDValue &Callee,
5006  const PPCSubtarget &Subtarget,
5007  const TargetMachine &TM) {
5008  if (CFlags.IsTailCall)
5009  return PPCISD::TC_RETURN;
5010 
5011  // This is a call through a function pointer.
5012  if (CFlags.IsIndirect) {
5013  // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5014  // indirect calls. The save of the caller's TOC pointer to the stack will be
5015  // inserted into the DAG as part of call lowering. The restore of the TOC
5016  // pointer is modeled by using a pseudo instruction for the call opcode that
5017  // represents the 2 instruction sequence of an indirect branch and link,
5018  // immediately followed by a load of the TOC pointer from the the stack save
5019  // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5020  // as it is not saved or used.
5022  : PPCISD::BCTRL;
5023  }
5024 
5025  if (Subtarget.isUsingPCRelativeCalls()) {
5026  assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5027  return PPCISD::CALL_NOTOC;
5028  }
5029 
5030  // The ABIs that maintain a TOC pointer accross calls need to have a nop
5031  // immediately following the call instruction if the caller and callee may
5032  // have different TOC bases. At link time if the linker determines the calls
5033  // may not share a TOC base, the call is redirected to a trampoline inserted
5034  // by the linker. The trampoline will (among other things) save the callers
5035  // TOC pointer at an ABI designated offset in the linkage area and the linker
5036  // will rewrite the nop to be a load of the TOC pointer from the linkage area
5037  // into gpr2.
5038  if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5039  return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5040  : PPCISD::CALL_NOP;
5041 
5042  return PPCISD::CALL;
5043 }
5044 
5045 static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5046  const SDLoc &dl, const PPCSubtarget &Subtarget) {
5047  if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5048  if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5049  return SDValue(Dest, 0);
5050 
5051  // Returns true if the callee is local, and false otherwise.
5052  auto isLocalCallee = [&]() {
5053  const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5054  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5055  const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5056 
5057  return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5058  !dyn_cast_or_null<GlobalIFunc>(GV);
5059  };
5060 
5061  // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5062  // a static relocation model causes some versions of GNU LD (2.17.50, at
5063  // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5064  // built with secure-PLT.
5065  bool UsePlt =
5066  Subtarget.is32BitELFABI() && !isLocalCallee() &&
5068 
5069  const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5070  const TargetMachine &TM = Subtarget.getTargetMachine();
5071  const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5072  MCSymbolXCOFF *S =
5073  cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5074 
5075  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5076  return DAG.getMCSymbol(S, PtrVT);
5077  };
5078 
5080  const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5081 
5082  if (Subtarget.isAIXABI()) {
5083  assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5084  return getAIXFuncEntryPointSymbolSDNode(GV);
5085  }
5086  return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5087  UsePlt ? PPCII::MO_PLT : 0);
5088  }
5089 
5090  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5091  const char *SymName = S->getSymbol();
5092  if (Subtarget.isAIXABI()) {
5093  // If there exists a user-declared function whose name is the same as the
5094  // ExternalSymbol's, then we pick up the user-declared version.
5095  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5096  if (const Function *F =
5097  dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5098  return getAIXFuncEntryPointSymbolSDNode(F);
5099 
5100  // On AIX, direct function calls reference the symbol for the function's
5101  // entry point, which is named by prepending a "." before the function's
5102  // C-linkage name. A Qualname is returned here because an external
5103  // function entry point is a csect with XTY_ER property.
5104  const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5105  auto &Context = DAG.getMachineFunction().getMMI().getContext();
5106  MCSectionXCOFF *Sec = Context.getXCOFFSection(
5107  (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,
5109  return Sec->getQualNameSymbol();
5110  };
5111 
5112  SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5113  }
5114  return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5115  UsePlt ? PPCII::MO_PLT : 0);
5116  }
5117 
5118  // No transformation needed.
5119  assert(Callee.getNode() && "What no callee?");
5120  return Callee;
5121 }
5122 
5124  assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5125  "Expected a CALLSEQ_STARTSDNode.");
5126 
5127  // The last operand is the chain, except when the node has glue. If the node
5128  // has glue, then the last operand is the glue, and the chain is the second
5129  // last operand.
5130  SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5131  if (LastValue.getValueType() != MVT::Glue)
5132  return LastValue;
5133 
5134  return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5135 }
5136 
5137 // Creates the node that moves a functions address into the count register
5138 // to prepare for an indirect call instruction.
5139 static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5140  SDValue &Glue, SDValue &Chain,
5141  const SDLoc &dl) {
5142  SDValue MTCTROps[] = {Chain, Callee, Glue};
5143  EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5144  Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5145  makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5146  // The glue is the second value produced.
5147  Glue = Chain.getValue(1);
5148 }
5149 
5151  SDValue &Glue, SDValue &Chain,
5152  SDValue CallSeqStart,
5153  const CallBase *CB, const SDLoc &dl,
5154  bool hasNest,
5155  const PPCSubtarget &Subtarget) {
5156  // Function pointers in the 64-bit SVR4 ABI do not point to the function
5157  // entry point, but to the function descriptor (the function entry point
5158  // address is part of the function descriptor though).
5159  // The function descriptor is a three doubleword structure with the
5160  // following fields: function entry point, TOC base address and
5161  // environment pointer.
5162  // Thus for a call through a function pointer, the following actions need
5163  // to be performed:
5164  // 1. Save the TOC of the caller in the TOC save area of its stack
5165  // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5166  // 2. Load the address of the function entry point from the function
5167  // descriptor.
5168  // 3. Load the TOC of the callee from the function descriptor into r2.
5169  // 4. Load the environment pointer from the function descriptor into
5170  // r11.
5171  // 5. Branch to the function entry point address.
5172  // 6. On return of the callee, the TOC of the caller needs to be
5173  // restored (this is done in FinishCall()).
5174  //
5175  // The loads are scheduled at the beginning of the call sequence, and the
5176  // register copies are flagged together to ensure that no other
5177  // operations can be scheduled in between. E.g. without flagging the
5178  // copies together, a TOC access in the caller could be scheduled between
5179  // the assignment of the callee TOC and the branch to the callee, which leads
5180  // to incorrect code.
5181 
5182  // Start by loading the function address from the descriptor.
5183  SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5184  auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5188 
5189  MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5190 
5191  // Registers used in building the DAG.
5192  const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5193  const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5194 
5195  // Offsets of descriptor members.
5196  const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5197  const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5198 
5199  const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5200  const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5201 
5202  // One load for the functions entry point address.
5203  SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5204  Alignment, MMOFlags);
5205 
5206  // One for loading the TOC anchor for the module that contains the called
5207  // function.
5208  SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5209  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5210  SDValue TOCPtr =
5211  DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5212  MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5213 
5214  // One for loading the environment pointer.
5215  SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5216  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5217  SDValue LoadEnvPtr =
5218  DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5219  MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5220 
5221 
5222  // Then copy the newly loaded TOC anchor to the TOC pointer.
5223  SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5224  Chain = TOCVal.getValue(0);
5225  Glue = TOCVal.getValue(1);
5226 
5227  // If the function call has an explicit 'nest' parameter, it takes the
5228  // place of the environment pointer.
5229  assert((!hasNest || !Subtarget.isAIXABI()) &&
5230  "Nest parameter is not supported on AIX.");
5231  if (!hasNest) {
5232  SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5233  Chain = EnvVal.getValue(0);
5234  Glue = EnvVal.getValue(1);
5235  }
5236 
5237  // The rest of the indirect call sequence is the same as the non-descriptor
5238  // DAG.
5239  prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5240 }
5241 
5242 static void
5244  PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5245  SelectionDAG &DAG,
5246  SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5247  SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5248  const PPCSubtarget &Subtarget) {
5249  const bool IsPPC64 = Subtarget.isPPC64();
5250  // MVT for a general purpose register.
5251  const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5252 
5253  // First operand is always the chain.
5254  Ops.push_back(Chain);
5255 
5256  // If it's a direct call pass the callee as the second operand.
5257  if (!CFlags.IsIndirect)
5258  Ops.push_back(Callee);
5259  else {
5260  assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5261 
5262  // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5263  // on the stack (this would have been done in `LowerCall_64SVR4` or
5264  // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5265  // represents both the indirect branch and a load that restores the TOC
5266  // pointer from the linkage area. The operand for the TOC restore is an add
5267  // of the TOC save offset to the stack pointer. This must be the second
5268  // operand: after the chain input but before any other variadic arguments.
5269  // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5270  // saved or used.
5271  if (isTOCSaveRestoreRequired(Subtarget)) {
5272  const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5273 
5274  SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5275  unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5276  SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5277  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5278  Ops.push_back(AddTOC);
5279  }
5280 
5281  // Add the register used for the environment pointer.
5282  if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5284  RegVT));
5285 
5286 
5287  // Add CTR register as callee so a bctr can be emitted later.
5288  if (CFlags.IsTailCall)
5289  Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5290  }
5291 
5292  // If this is a tail call add stack pointer delta.
5293  if (CFlags.IsTailCall)
5294  Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5295 
5296  // Add argument registers to the end of the list so that they are known live
5297  // into the call.
5298  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5299  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5300  RegsToPass[i].second.getValueType()));
5301 
5302  // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5303  // no way to mark dependencies as implicit here.
5304  // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5305  if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5306  !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5307  Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5308 
5309  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5310  if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5311  Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5312 
5313  // Add a register mask operand representing the call-preserved registers.
5314  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5315  const uint32_t *Mask =
5316  TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5317  assert(Mask && "Missing call preserved mask for calling convention");
5318  Ops.push_back(DAG.getRegisterMask(Mask));
5319 
5320  // If the glue is valid, it is the last operand.
5321  if (Glue.getNode())
5322  Ops.push_back(Glue);
5323 }
5324 
5325 SDValue PPCTargetLowering::FinishCall(
5326  CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5327  SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5328  SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5329  unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5330  SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5331 
5332  if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5333  Subtarget.isAIXABI())
5334  setUsesTOCBasePtr(DAG);
5335 
5336  unsigned CallOpc =
5337  getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5338  Subtarget, DAG.getTarget());
5339 
5340  if (!CFlags.IsIndirect)
5341  Callee = transformCallee(Callee, DAG, dl, Subtarget);
5342  else if (Subtarget.usesFunctionDescriptors())
5343  prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5344  dl, CFlags.HasNest, Subtarget);
5345  else
5346  prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5347 
5348  // Build the operand list for the call instruction.
5350  buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5351  SPDiff, Subtarget);
5352 
5353  // Emit tail call.
5354  if (CFlags.IsTailCall) {
5355  // Indirect tail call when using PC Relative calls do not have the same
5356  // constraints.
5357  assert(((Callee.getOpcode() == ISD::Register &&
5358  cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5359  Callee.getOpcode() == ISD::TargetExternalSymbol ||
5360  Callee.getOpcode() == ISD::TargetGlobalAddress ||
5361  isa<ConstantSDNode>(Callee) ||
5362  (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5363  "Expecting a global address, external symbol, absolute value, "
5364  "register or an indirect tail call when PC Relative calls are "
5365  "used.");
5366  // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5367  assert(CallOpc == PPCISD::TC_RETURN &&
5368  "Unexpected call opcode for a tail call.");
5370  return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5371  }
5372 
5373  std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5374  Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5375  DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5376  Glue = Chain.getValue(1);
5377 
5378  // When performing tail call optimization the callee pops its arguments off
5379  // the stack. Account for this here so these bytes can be pushed back on in
5380  // PPCFrameLowering::eliminateCallFramePseudoInstr.
5381  int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5383  ? NumBytes
5384  : 0;
5385 
5386  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5387  DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5388  Glue, dl);
5389  Glue = Chain.getValue(1);
5390 
5391  return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5392  DAG, InVals);
5393 }
5394 
5395 SDValue
5396 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5397  SmallVectorImpl<SDValue> &InVals) const {
5398  SelectionDAG &DAG = CLI.DAG;
5399  SDLoc &dl = CLI.DL;
5401  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5403  SDValue Chain = CLI.Chain;
5404  SDValue Callee = CLI.Callee;
5405  bool &isTailCall = CLI.IsTailCall;
5406  CallingConv::ID CallConv = CLI.CallConv;
5407  bool isVarArg = CLI.IsVarArg;
5408  bool isPatchPoint = CLI.IsPatchPoint;
5409  const CallBase *CB = CLI.CB;
5410 
5411  if (isTailCall) {
5412  if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5413  isTailCall = false;
5414  else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5415  isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5416  Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5417  else
5418  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5419  Ins, DAG);
5420  if (isTailCall) {
5421  ++NumTailCalls;
5422  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5423  ++NumSiblingCalls;
5424 
5425  // PC Relative calls no longer guarantee that the callee is a Global
5426  // Address Node. The callee could be an indirect tail call in which
5427  // case the SDValue for the callee could be a load (to load the address
5428  // of a function pointer) or it may be a register copy (to move the
5429  // address of the callee from a function parameter into a virtual
5430  // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5431  assert((Subtarget.isUsingPCRelativeCalls() ||
5432  isa<GlobalAddressSDNode>(Callee)) &&
5433  "Callee should be an llvm::Function object.");
5434 
5435  LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5436  << "\nTCO callee: ");
5437  LLVM_DEBUG(Callee.dump());
5438  }
5439  }
5440 
5441  if (!isTailCall && CB && CB->isMustTailCall())
5442  report_fatal_error("failed to perform tail call elimination on a call "
5443  "site marked musttail");
5444 
5445  // When long calls (i.e. indirect calls) are always used, calls are always
5446  // made via function pointer. If we have a function name, first translate it
5447  // into a pointer.
5448  if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5449  !isTailCall)
5450  Callee = LowerGlobalAddress(Callee, DAG);
5451 
5452  CallFlags CFlags(
5453  CallConv, isTailCall, isVarArg, isPatchPoint,
5454  isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5455  // hasNest
5456  Subtarget.is64BitELFABI() &&
5457  any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5458  CLI.NoMerge);
5459 
5460  if (Subtarget.isAIXABI())
5461  return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5462  InVals, CB);
5463 
5464  assert(Subtarget.isSVR4ABI());
5465  if (Subtarget.isPPC64())
5466  return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5467  InVals, CB);
5468  return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5469  InVals, CB);
5470 }
5471 
5472 SDValue PPCTargetLowering::LowerCall_32SVR4(
5473  SDValue Chain, SDValue Callee, CallFlags CFlags,
5474  const SmallVectorImpl<ISD::OutputArg> &Outs,
5475  const SmallVectorImpl<SDValue> &OutVals,
5476  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5477  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5478  const CallBase *CB) const {
5479  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5480  // of the 32-bit SVR4 ABI stack frame layout.
5481 
5482  const CallingConv::ID CallConv = CFlags.CallConv;
5483  const bool IsVarArg = CFlags.IsVarArg;
5484  const bool IsTailCall = CFlags.IsTailCall;
5485 
5486  assert((CallConv == CallingConv::C ||
5487  CallConv == CallingConv::Cold ||
5488  CallConv == CallingConv::Fast) && "Unknown calling convention!");
5489 
5490  const Align PtrAlign(4);
5491 
5492  MachineFunction &MF = DAG.getMachineFunction();
5493 
5494  // Mark this function as potentially containing a function that contains a
5495  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5496  // and restoring the callers stack pointer in this functions epilog. This is
5497  // done because by tail calling the called function might overwrite the value
5498  // in this function's (MF) stack pointer stack slot 0(SP).
5499  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5500  CallConv == CallingConv::Fast)
5501  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5502 
5503  // Count how many bytes are to be pushed on the stack, including the linkage
5504  // area, parameter list area and the part of the local variable space which
5505  // contains copies of aggregates which are passed by value.
5506 
5507  // Assign locations to all of the outgoing arguments.
5509  PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5510 
5511  // Reserve space for the linkage area on the stack.
5512  CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5513  PtrAlign);
5514  if (useSoftFloat())
5515  CCInfo.PreAnalyzeCallOperands(Outs);
5516 
5517  if (IsVarArg) {
5518  // Handle fixed and variable vector arguments differently.
5519  // Fixed vector arguments go into registers as long as registers are
5520  // available. Variable vector arguments always go into memory.
5521  unsigned NumArgs = Outs.size();
5522 
5523  for (unsigned i = 0; i != NumArgs; ++i) {
5524  MVT ArgVT = Outs[i].VT;
5525  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5526  bool Result;
5527 
5528  if (Outs[i].IsFixed) {
5529  Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5530  CCInfo);
5531  } else {
5532  Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5533  ArgFlags, CCInfo);
5534  }
5535 
5536  if (Result) {
5537 #ifndef NDEBUG
5538  errs() << "Call operand #" << i << " has unhandled type "
5539  << EVT(ArgVT).getEVTString() << "\n";
5540 #endif
5541  llvm_unreachable(nullptr);
5542  }
5543  }
5544  } else {
5545  // All arguments are treated the same.
5546  CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5547  }
5548  CCInfo.clearWasPPCF128();
5549 
5550  // Assign locations to all of the outgoing aggregate by value arguments.
5551  SmallVector<CCValAssign, 16> ByValArgLocs;
5552  CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5553 
5554  // Reserve stack space for the allocations in CCInfo.
5555  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5556 
5557  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5558 
5559  // Size of the linkage area, parameter list area and the part of the local
5560  // space variable where copies of aggregates which are passed by value are
5561  // stored.
5562  unsigned NumBytes = CCByValInfo.getNextStackOffset();
5563 
5564  // Calculate by how many bytes the stack has to be adjusted in case of tail
5565  // call optimization.
5566  int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5567 
5568  // Adjust the stack pointer for the new arguments...
5569  // These operations are automatically eliminated by the prolog/epilog pass
5570  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5571  SDValue CallSeqStart = Chain;
5572 
5573  // Load the return address and frame pointer so it can be moved somewhere else
5574  // later.
5575  SDValue LROp, FPOp;
5576  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5577 
5578  // Set up a copy of the stack pointer for use loading and storing any
5579  // arguments that may not fit in the registers available for argument
5580  // passing.
5581  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5582 
5584  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5585  SmallVector<SDValue, 8> MemOpChains;
5586 
5587  bool seenFloatArg = false;
5588  // Walk the register/memloc assignments, inserting copies/loads.
5589  // i - Tracks the index into the list of registers allocated for the call
5590  // RealArgIdx - Tracks the index into the list of actual function arguments
5591  // j - Tracks the index into the list of byval arguments
5592  for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5593  i != e;
5594  ++i, ++RealArgIdx) {
5595  CCValAssign &VA = ArgLocs[i];
5596  SDValue Arg = OutVals[RealArgIdx];
5597  ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5598 
5599  if (Flags.isByVal()) {
5600  // Argument is an aggregate which is passed by value, thus we need to
5601  // create a copy of it in the local variable space of the current stack
5602  // frame (which is the stack frame of the caller) and pass the address of
5603  // this copy to the callee.
5604  assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5605  CCValAssign &ByValVA = ByValArgLocs[j++];
5606  assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5607 
5608  // Memory reserved in the local variable space of the callers stack frame.
5609  unsigned LocMemOffset = ByValVA.getLocMemOffset();
5610 
5611  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5612  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5613  StackPtr, PtrOff);
5614 
5615  // Create a copy of the argument in the local area of the current
5616  // stack frame.
5617  SDValue MemcpyCall =
5619  CallSeqStart.getNode()->getOperand(0),
5620  Flags, DAG, dl);
5621 
5622  // This must go outside the CALLSEQ_START..END.
5623  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5624  SDLoc(MemcpyCall));
5625  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5626  NewCallSeqStart.getNode());
5627  Chain = CallSeqStart = NewCallSeqStart;
5628 
5629  // Pass the address of the aggregate copy on the stack either in a
5630  // physical register or in the parameter list area of the current stack
5631  // frame to the callee.
5632  Arg = PtrOff;
5633  }
5634 
5635  // When useCRBits() is true, there can be i1 arguments.
5636  // It is because getRegisterType(MVT::i1) => MVT::i1,
5637  // and for other integer types getRegisterType() => MVT::i32.
5638  // Extend i1 and ensure callee will get i32.
5639  if (Arg.getValueType() == MVT::i1)
5641  dl, MVT::i32, Arg);
5642 
5643  if (VA.isRegLoc()) {
5644  seenFloatArg |= VA.getLocVT().isFloatingPoint();
5645  // Put argument in a physical register.
5646  if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5647  bool IsLE = Subtarget.isLittleEndian();
5648  SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5649  DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5650  RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5651  SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5652  DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5653  RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5654  SVal.getValue(0)));
5655  } else
5656  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5657  } else {
5658  // Put argument in the parameter list area of the current stack frame.
5659  assert(VA.isMemLoc());
5660  unsigned LocMemOffset = VA.getLocMemOffset();
5661 
5662  if (!IsTailCall) {
5663  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5664  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5665  StackPtr, PtrOff);
5666 
5667  MemOpChains.push_back(
5668  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5669  } else {
5670  // Calculate and remember argument location.
5671  CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5672  TailCallArguments);
5673  }
5674  }
5675  }
5676 
5677  if (!MemOpChains.empty())
5678  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5679 
5680  // Build a sequence of copy-to-reg nodes chained together with token chain
5681  // and flag operands which copy the outgoing args into the appropriate regs.
5682  SDValue InFlag;
5683  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5684  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5685  RegsToPass[i].second, InFlag);
5686  InFlag = Chain.getValue(1);
5687  }
5688 
5689  // Set CR bit 6 to true if this is a vararg call with floating args passed in
5690  // registers.
5691  if (IsVarArg) {
5692  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5693  SDValue Ops[] = { Chain, InFlag };
5694 
5695  Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5696  dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5697 
5698  InFlag = Chain.getValue(1);
5699  }
5700 
5701  if (IsTailCall)
5702  PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5703  TailCallArguments);
5704 
5705  return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5706  Callee, SPDiff, NumBytes, Ins, InVals, CB);
5707 }
5708 
5709 // Copy an argument into memory, being careful to do this outside the
5710 // call sequence for the call to which the argument belongs.
5711 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5712  SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5713  SelectionDAG &DAG, const SDLoc &dl) const {
5714  SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5715  CallSeqStart.getNode()->getOperand(0),
5716  Flags, DAG, dl);
5717  // The MEMCPY must go outside the CALLSEQ_START..END.
5718  int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5719  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5720  SDLoc(MemcpyCall));
5721  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5722  NewCallSeqStart.getNode());
5723  return NewCallSeqStart;
5724 }
5725 
5726 SDValue PPCTargetLowering::LowerCall_64SVR4(
5727  SDValue Chain, SDValue Callee, CallFlags CFlags,
5728  const SmallVectorImpl<ISD::OutputArg> &Outs,
5729  const SmallVectorImpl<SDValue> &OutVals,
5730  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5731  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5732  const CallBase *CB) const {
5733  bool isELFv2ABI = Subtarget.isELFv2ABI();
5734  bool isLittleEndian = Subtarget.isLittleEndian();
5735  unsigned NumOps = Outs.size();
5736  bool IsSibCall = false;
5737  bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
5738 
5739  EVT PtrVT = getPointerTy(DAG.getDataLayout());
5740  unsigned PtrByteSize = 8;
5741 
5742  MachineFunction &MF = DAG.getMachineFunction();
5743 
5744  if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5745  IsSibCall = true;
5746 
5747  // Mark this function as potentially containing a function that contains a
5748  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5749  // and restoring the callers stack pointer in this functions epilog. This is
5750  // done because by tail calling the called function might overwrite the value
5751  // in this function's (MF) stack pointer stack slot 0(SP).
5752  if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5753  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5754 
5755  assert(!(IsFastCall && CFlags.IsVarArg) &&
5756  "fastcc not supported on varargs functions");
5757 
5758  // Count how many bytes are to be pushed on the stack, including the linkage
5759  // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5760  // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5761  // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5762  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5763  unsigned NumBytes = LinkageSize;
5764  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5765 
5766  static const MCPhysReg GPR[] = {
5767  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5768  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5769  };
5770  static const MCPhysReg VR[] = {
5771  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5772  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5773  };
5774 
5775  const unsigned NumGPRs = array_lengthof(GPR);
5776  const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5777  const unsigned NumVRs = array_lengthof(VR);
5778 
5779  // On ELFv2, we can avoid allocating the parameter area if all the arguments
5780  // can be passed to the callee in registers.
5781  // For the fast calling convention, there is another check below.
5782  // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5783  bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
5784  if (!HasParameterArea) {
5785  unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5786  unsigned AvailableFPRs = NumFPRs;
5787  unsigned AvailableVRs = NumVRs;
5788  unsigned NumBytesTmp = NumBytes;
5789  for (unsigned i = 0; i != NumOps; ++i) {
5790  if (Outs[i].Flags.isNest()) continue;
5791  if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5792  PtrByteSize, LinkageSize, ParamAreaSize,
5793  NumBytesTmp, AvailableFPRs, AvailableVRs))
5794  HasParameterArea = true;
5795  }
5796  }
5797 
5798  // When using the fast calling convention, we don't provide backing for
5799  // arguments that will be in registers.
5800  unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5801 
5802  // Avoid allocating parameter area for fastcc functions if all the arguments
5803  // can be passed in the registers.
5804  if (IsFastCall)
5805  HasParameterArea = false;
5806 
5807  // Add up all the space actually used.
5808  for (unsigned i = 0; i != NumOps; ++i) {
5809  ISD::ArgFlagsTy Flags = Outs[i].Flags;
5810  EVT ArgVT = Outs[i].VT;
5811  EVT OrigVT = Outs[i].ArgVT;
5812 
5813  if (Flags.isNest())
5814  continue;
5815 
5816  if (IsFastCall) {
5817  if (Flags.isByVal()) {
5818  NumGPRsUsed += (Flags.getByValSize()+7)/8;
5819  if (NumGPRsUsed > NumGPRs)
5820  HasParameterArea = true;
5821  } else {
5822  switch (ArgVT.getSimpleVT().SimpleTy) {
5823  default: llvm_unreachable("Unexpected ValueType for argument!");
5824  case MVT::i1:
5825  case MVT::i32:
5826  case MVT::i64:
5827  if (++NumGPRsUsed <= NumGPRs)
5828  continue;
5829  break;
5830  case MVT::v4i32:
5831  case MVT::v8i16:
5832  case MVT::v16i8:
5833  case MVT::v2f64:
5834  case MVT::v2i64:
5835  case MVT::v1i128:
5836  case MVT::f128:
5837  if (++NumVRsUsed <= NumVRs)
5838  continue;
5839  break;
5840  case MVT::v4f32:
5841  if (++NumVRsUsed <= NumVRs)
5842  continue;
5843  break;
5844  case MVT::f32:
5845  case MVT::f64:
5846  if (++NumFPRsUsed <= NumFPRs)
5847  continue;
5848  break;
5849  }
5850  HasParameterArea = true;
5851  }
5852  }
5853 
5854  /* Respect alignment of argument on the stack. */
5855  auto Alignement =
5856  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5857  NumBytes = alignTo(NumBytes, Alignement);
5858 
5859  NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5860  if (Flags.isInConsecutiveRegsLast())
5861  NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5862  }
5863 
5864  unsigned NumBytesActuallyUsed = NumBytes;
5865 
5866  // In the old ELFv1 ABI,
5867  // the prolog code of the callee may store up to 8 GPR argument registers to
5868  // the stack, allowing va_start to index over them in memory if its varargs.
5869  // Because we cannot tell if this is needed on the caller side, we have to
5870  // conservatively assume that it is needed. As such, make sure we have at
5871  // least enough stack space for the caller to store the 8 GPRs.
5872  // In the ELFv2 ABI, we allocate the parameter area iff a callee
5873  // really requires memory operands, e.g. a vararg function.
5874  if (HasParameterArea)
5875  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5876  else
5877  NumBytes = LinkageSize;
5878 
5879  // Tail call needs the stack to be aligned.
5880  if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5881  NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5882 
5883  int SPDiff = 0;
5884 
5885  // Calculate by how many bytes the stack has to be adjusted in case of tail
5886  // call optimization.
5887  if (!IsSibCall)
5888  SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
5889 
5890  // To protect arguments on the stack from being clobbered in a tail call,
5891  // force all the loads to happen before doing any other lowering.
5892  if (CFlags.IsTailCall)
5893  Chain = DAG.getStackArgumentTokenFactor(Chain);
5894 
5895  // Adjust the stack pointer for the new arguments...
5896  // These operations are automatically eliminated by the prolog/epilog pass
5897  if (!IsSibCall)
5898  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5899  SDValue CallSeqStart = Chain;
5900 
5901  // Load the return address and frame pointer so it can be move somewhere else
5902  // later.
5903  SDValue LROp, FPOp;
5904  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5905 
5906  // Set up a copy of the stack pointer for use loading and storing any
5907  // arguments that may not fit in the registers available for argument
5908  // passing.
5909  SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5910 
5911  // Figure out which arguments are going to go in registers, and which in
5912  // memory. Also, if this is a vararg function, floating point operations
5913  // must be stored to our stack, and loaded into integer regs as well, if
5914  // any integer regs are available for argument passing.
5915  unsigned ArgOffset = LinkageSize;
5916 
5918  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5919 
5920  SmallVector<SDValue, 8> MemOpChains;
5921  for (unsigned i = 0; i != NumOps; ++i) {
5922  SDValue Arg = OutVals[i];
5923  ISD::ArgFlagsTy Flags = Outs[i].Flags;
5924  EVT ArgVT = Outs[i].VT;
5925  EVT OrigVT = Outs[i].ArgVT;
5926 
5927  // PtrOff will be used to store the current argument to the stack if a
5928  // register cannot be found for it.
5929  SDValue PtrOff;
5930 
5931  // We re-align the argument offset for each argument, except when using the
5932  // fast calling convention, when we need to make sure we do that only when
5933  // we'll actually use a stack slot.
5934  auto ComputePtrOff = [&]() {
5935  /* Respect alignment of argument on the stack. */
5936  auto Alignment =
5937  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5938  ArgOffset = alignTo(ArgOffset, Alignment);
5939 
5940  PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5941 
5942  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5943  };
5944 
5945  if (!IsFastCall) {
5946  ComputePtrOff();
5947 
5948  /* Compute GPR index associated with argument offset. */
5949  GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5950  GPR_idx = std::min(GPR_idx, NumGPRs);
5951  }
5952 
5953  // Promote integers to 64-bit values.
5954  if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5955  // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5956  unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5957  Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5958  }
5959 
5960  // FIXME memcpy is used way more than necessary. Correctness first.
5961  // Note: "by value" is code for passing a structure by value, not
5962  // basic types.
5963  if (Flags.isByVal()) {
5964  // Note: Size includes alignment padding, so
5965  // struct x { short a; char b; }
5966  // will have Size = 4. With #pragma pack(1), it will have Size = 3.
5967  // These are the proper values we need for right-justifying the
5968  // aggregate in a parameter register.
5969  unsigned Size = Flags.getByValSize();
5970 
5971  // An empty aggregate parameter takes up no storage and no
5972  // registers.
5973  if (Size == 0)
5974  continue;
5975 
5976  if (IsFastCall)
5977  ComputePtrOff();
5978 
5979  // All aggregates smaller than 8 bytes must be passed right-justified.
5980  if (Size==1 || Size==2 || Size==4) {
5981  EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5982  if (GPR_idx != NumGPRs) {
5983  SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5984  MachinePointerInfo(), VT);
5985  MemOpChains.push_back(Load.getValue(1));
5986  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5987 
5988  ArgOffset += PtrByteSize;
5989  continue;
5990  }
5991  }
5992 
5993  if (GPR_idx == NumGPRs && Size < 8) {
5994  SDValue AddPtr = PtrOff;
5995  if (!isLittleEndian) {
5996  SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5997  PtrOff.getValueType());
5998  AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5999  }
6000  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6001  CallSeqStart,
6002  Flags, DAG, dl);
6003  ArgOffset += PtrByteSize;
6004  continue;
6005  }
6006  // Copy entire object into memory. There are cases where gcc-generated
6007  // code assumes it is there, even if it could be put entirely into
6008  // registers. (This is not what the doc says.)
6009 
6010  // FIXME: The above statement is likely due to a misunderstanding of the
6011  // documents. All arguments must be copied into the parameter area BY
6012  // THE CALLEE in the event that the callee takes the address of any
6013  // formal argument. That has not yet been implemented. However, it is
6014  // reasonable to use the stack area as a staging area for the register
6015  // load.
6016 
6017  // Skip this for small aggregates, as we will use the same slot for a
6018  // right-justified copy, below.
6019  if (Size >= 8)
6020  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6021  CallSeqStart,
6022  Flags, DAG, dl);
6023 
6024  // When a register is available, pass a small aggregate right-justified.
6025  if (Size < 8 && GPR_idx != NumGPRs) {
6026  // The easiest way to get this right-justified in a register
6027  // is to copy the structure into the rightmost portion of a
6028  // local variable slot, then load the whole slot into the
6029  // register.
6030  // FIXME: The memcpy seems to produce pretty awful code for
6031  // small aggregates, particularly for packed ones.
6032  // FIXME: It would be preferable to use the slot in the
6033  // parameter save area instead of a new local variable.
6034  SDValue AddPtr = PtrOff;
6035  if (!isLittleEndian) {
6036  SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6037  AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6038  }
6039  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6040  CallSeqStart,
6041  Flags, DAG, dl);
6042 
6043  // Load the slot into the register.
6044  SDValue Load =
6045  DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6046  MemOpChains.push_back(Load.getValue(1));
6047  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6048 
6049  // Done with this argument.
6050  ArgOffset += PtrByteSize;
6051  continue;
6052  }
6053 
6054  // For aggregates larger than PtrByteSize, copy the pieces of the
6055  // object that fit into registers from the parameter save area.
6056  for (unsigned j=0; j<Size; j+=PtrByteSize) {
6057  SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6058  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6059  if (GPR_idx != NumGPRs) {
6060  SDValue Load =
6061  DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6062  MemOpChains.push_back(Load.getValue(1));
6063  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6064  ArgOffset += PtrByteSize;
6065  } else {
6066  ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6067  break;
6068  }
6069  }
6070  continue;
6071  }
6072 
6073  switch (Arg.getSimpleValueType().SimpleTy) {
6074  default: llvm_unreachable("Unexpected ValueType for argument!");
6075  case MVT::i1:
6076  case MVT::i32:
6077  case MVT::i64:
6078  if (Flags.isNest()) {
6079  // The 'nest' parameter, if any, is passed in R11.
6080  RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6081  break;
6082  }
6083 
6084  // These can be scalar arguments or elements of an integer array type
6085  // passed directly. Clang may use those instead of "byval" aggregate
6086  // types to avoid forcing arguments to memory unnecessarily.
6087  if (GPR_idx != NumGPRs) {
6088  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6089  } else {
6090  if (IsFastCall)
6091  ComputePtrOff();
6092 
6093  assert(HasParameterArea &&
6094  "Parameter area must exist to pass an argument in memory.");
6095  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6096  true, CFlags.IsTailCall, false, MemOpChains,
6097  TailCallArguments, dl);
6098  if (IsFastCall)
6099  ArgOffset += PtrByteSize;
6100  }
6101  if (!IsFastCall)
6102  ArgOffset += PtrByteSize;
6103  break;
6104  case MVT::f32:
6105  case MVT::f64: {
6106  // These can be scalar arguments or elements of a float array type
6107  // passed directly. The latter are used to implement ELFv2 homogenous
6108  // float aggregates.
6109 
6110  // Named arguments go into FPRs first, and once they overflow, the
6111  // remaining arguments go into GPRs and then the parameter save area.
6112  // Unnamed arguments for vararg functions always go to GPRs and
6113  // then the parameter save area. For now, put all arguments to vararg
6114  // routines always in both locations (FPR *and* GPR or stack slot).
6115  bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6116  bool NeededLoad = false;
6117 
6118  // First load the argument into the next available FPR.
6119  if (FPR_idx != NumFPRs)
6120  RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6121 
6122  // Next, load the argument into GPR or stack slot if needed.
6123  if (!NeedGPROrStack)
6124  ;
6125  else if (GPR_idx != NumGPRs && !IsFastCall) {
6126  // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6127  // once we support fp <-> gpr moves.
6128 
6129  // In the non-vararg case, this can only ever happen in the
6130  // presence of f32 array types, since otherwise we never run
6131  // out of FPRs before running out of GPRs.
6132  SDValue ArgVal;
6133 
6134  // Double values are always passed in a single GPR.
6135  if (Arg.getValueType() != MVT::f32) {
6136  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6137 
6138  // Non-array float values are extended and passed in a GPR.
6139  } else if (!Flags.isInConsecutiveRegs()) {
6140  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6141  ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6142 
6143  // If we have an array of floats, we collect every odd element
6144  // together with its predecessor into one GPR.
6145  } else if (ArgOffset % PtrByteSize != 0) {
6146  SDValue Lo, Hi;
6147  Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6148  Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6149  if (!isLittleEndian)
6150  std::swap(Lo, Hi);
6151  ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6152 
6153  // The final element, if even, goes into the first half of a GPR.
6154  } else if (Flags.isInConsecutiveRegsLast()) {
6155  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6156  ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6157  if (!isLittleEndian)
6158  ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6159  DAG.getConstant(32, dl, MVT::i32));
6160 
6161  // Non-final even elements are skipped; they will be handled
6162  // together the with subsequent argument on the next go-around.
6163  } else
6164  ArgVal = SDValue();
6165 
6166  if (ArgVal.getNode())
6167  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6168  } else {
6169  if (IsFastCall)
6170  ComputePtrOff();
6171 
6172  // Single-precision floating-point values are mapped to the
6173  // second (rightmost) word of the stack doubleword.
6174  if (Arg.getValueType() == MVT::f32 &&
6175  !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6176  SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6177  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6178  }
6179 
6180  assert(HasParameterArea &&
6181  "Parameter area must exist to pass an argument in memory.");
6182  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6183  true, CFlags.IsTailCall, false, MemOpChains,
6184  TailCallArguments, dl);
6185 
6186  NeededLoad = true;
6187  }
6188  // When passing an array of floats, the array occupies consecutive
6189  // space in the argument area; only round up to the next doubleword
6190  // at the end of the array. Otherwise, each float takes 8 bytes.
6191  if (!IsFastCall || NeededLoad) {
6192  ArgOffset += (Arg.getValueType() == MVT::f32 &&
6193  Flags.isInConsecutiveRegs()) ? 4 : 8;
6194  if (Flags.isInConsecutiveRegsLast())
6195  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6196  }
6197  break;
6198  }
6199  case MVT::v4f32:
6200  case MVT::v4i32:
6201  case MVT::v8i16:
6202  case MVT::v16i8:
6203  case MVT::v2f64:
6204  case MVT::v2i64:
6205  case MVT::v1i128:
6206  case MVT::f128:
6207  // These can be scalar arguments or elements of a vector array type
6208  // passed directly. The latter are used to implement ELFv2 homogenous
6209  // vector aggregates.
6210 
6211  // For a varargs call, named arguments go into VRs or on the stack as
6212  // usual; unnamed arguments always go to the stack or the corresponding
6213  // GPRs when within range. For now, we always put the value in both
6214  // locations (or even all three).
6215  if (CFlags.IsVarArg) {
6216  assert(HasParameterArea &&
6217  "Parameter area must exist if we have a varargs call.");
6218  // We could elide this store in the case where the object fits
6219  // entirely in R registers. Maybe later.
6220  SDValue Store =
6221  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6222  MemOpChains.push_back(Store);
6223  if (VR_idx != NumVRs) {
6224  SDValue Load =
6225  DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6226  MemOpChains.push_back(Load.getValue(1));
6227  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6228  }
6229  ArgOffset += 16;
6230  for (unsigned i=0; i<16; i+=PtrByteSize) {
6231  if (GPR_idx == NumGPRs)
6232  break;
6233  SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6234  DAG.getConstant(i, dl, PtrVT));
6235  SDValue Load =
6236  DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6237  MemOpChains.push_back(Load.getValue(1));
6238  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6239  }
6240  break;
6241  }
6242 
6243  // Non-varargs Altivec params go into VRs or on the stack.
6244  if (VR_idx != NumVRs) {
6245  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6246  } else {
6247  if (IsFastCall)
6248  ComputePtrOff();
6249 
6250  assert(HasParameterArea &&
6251  "Parameter area must exist to pass an argument in memory.");
6252  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6253  true, CFlags.IsTailCall, true, MemOpChains,
6254  TailCallArguments, dl);
6255  if (IsFastCall)
6256  ArgOffset += 16;
6257  }
6258 
6259  if (!IsFastCall)
6260  ArgOffset += 16;
6261  break;
6262  }
6263  }
6264 
6265  assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6266  "mismatch in size of parameter area");
6267  (void)NumBytesActuallyUsed;
6268 
6269  if (!MemOpChains.empty())
6270  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6271 
6272  // Check if this is an indirect call (MTCTR/BCTRL).
6273  // See prepareDescriptorIndirectCall and buildCallOperands for more
6274  // information about calls through function pointers in the 64-bit SVR4 ABI.
6275  if (CFlags.IsIndirect) {
6276  // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6277  // caller in the TOC save area.
6278  if (isTOCSaveRestoreRequired(Subtarget)) {
6279  assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6280  // Load r2 into a virtual register and store it to the TOC save area.
6281  setUsesTOCBasePtr(DAG);
6282  SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6283  // TOC save area offset.
6284  unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6285  SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6286  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6287  Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6289  DAG.getMachineFunction(), TOCSaveOffset));
6290  }
6291  // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6292  // This does not mean the MTCTR instruction must use R12; it's easier
6293  // to model this as an extra parameter, so do that.
6294  if (isELFv2ABI && !CFlags.IsPatchPoint)
6295  RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6296  }
6297 
6298  // Build a sequence of copy-to-reg nodes chained together with token chain
6299  // and flag operands which copy the outgoing args into the appropriate regs.
6300  SDValue InFlag;
6301  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6302  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6303  RegsToPass[i].second, InFlag);
6304  InFlag = Chain.getValue(1);
6305  }
6306 
6307  if (CFlags.IsTailCall && !IsSibCall)
6308  PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6309  TailCallArguments);
6310 
6311  return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6312  Callee, SPDiff, NumBytes, Ins, InVals, CB);
6313 }
6314 
6315 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6316  CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6317  CCState &State) {
6318 
6319  const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6320  State.getMachineFunction().getSubtarget());
6321  const bool IsPPC64 = Subtarget.isPPC64();
6322  const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6323  const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6324 
6325  if (ValVT.isVector() && !State.getMachineFunction()
6326  .getTarget()
6328  report_fatal_error("the default Altivec AIX ABI is not yet supported");
6329 
6330  if (ValVT == MVT::f128)
6331  report_fatal_error("f128 is unimplemented on AIX.");
6332 
6333  if (ArgFlags.isNest())
6334  report_fatal_error("Nest arguments are unimplemented.");
6335 
6336  static const MCPhysReg GPR_32[] = {// 32-bit registers.
6337  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6338  PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6339  static const MCPhysReg GPR_64[] = {// 64-bit registers.
6340  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6341  PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6342 
6343  static const MCPhysReg VR[] = {// Vector registers.
6344  PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6345  PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6346  PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6347 
6348  if (ArgFlags.isByVal()) {
6349  if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6350  report_fatal_error("Pass-by-value arguments with alignment greater than "
6351  "register width are not supported.");
6352 
6353  const unsigned ByValSize = ArgFlags.getByValSize();
6354 
6355  // An empty aggregate parameter takes up no storage and no registers,
6356  // but needs a MemLoc for a stack slot for the formal arguments side.
6357  if (ByValSize == 0) {
6359  State.getNextStackOffset(), RegVT,
6360  LocInfo));
6361  return false;
6362  }
6363 
6364  const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6365  unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6366  for (const unsigned E = Offset + StackSize; Offset < E;
6367  Offset += PtrAlign.value()) {
6368  if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6369  State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6370  else {
6373  LocInfo));
6374  break;
6375  }
6376  }
6377  return false;
6378  }
6379 
6380  // Arguments always reserve parameter save area.
6381  switch (ValVT.SimpleTy) {
6382  default:
6383  report_fatal_error("Unhandled value type for argument.");
6384  case MVT::i64:
6385  // i64 arguments should have been split to i32 for PPC32.
6386  assert(IsPPC64 && "PPC32 should have split i64 values.");
6388  case MVT::i1:
6389  case MVT::i32: {
6390  const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6391  // AIX integer arguments are always passed in register width.
6392  if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6393  LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6394  : CCValAssign::LocInfo::ZExt;
6395  if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6396  State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6397  else
6398  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6399 
6400  return false;
6401  }
6402  case MVT::f32:
6403  case MVT::f64: {
6404  // Parameter save area (PSA) is reserved even if the float passes in fpr.
6405  const unsigned StoreSize = LocVT.getStoreSize();
6406  // Floats are always 4-byte aligned in the PSA on AIX.
6407  // This includes f64 in 64-bit mode for ABI compatibility.
6408  const unsigned Offset =
6409  State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6410  unsigned FReg = State.AllocateReg(FPR);
6411  if (FReg)
6412  State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6413 
6414  // Reserve and initialize GPRs or initialize the PSA as required.
6415  for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6416  if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6417  assert(FReg && "An FPR should be available when a GPR is reserved.");
6418  if (State.isVarArg()) {
6419  // Successfully reserved GPRs are only initialized for vararg calls.
6420  // Custom handling is required for:
6421  // f64 in PPC32 needs to be split into 2 GPRs.
6422  // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6423  State.addLoc(
6424  CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6425  }
6426  } else {
6427  // If there are insufficient GPRs, the PSA needs to be initialized.
6428  // Initialization occurs even if an FPR was initialized for
6429  // compatibility with the AIX XL compiler. The full memory for the
6430  // argument will be initialized even if a prior word is saved in GPR.
6431  // A custom memLoc is used when the argument also passes in FPR so
6432  // that the callee handling can skip over it easily.
6433  State.addLoc(
6434  FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6435  LocInfo)
6436  : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6437  break;
6438  }
6439  }
6440 
6441  return false;
6442  }
6443  case MVT::v4f32:
6444  case MVT::v4i32:
6445  case MVT::v8i16:
6446  case MVT::v16i8:
6447  case MVT::v2i64:
6448  case MVT::v2f64:
6449  case MVT::v1i128: {
6450  if (State.isVarArg())
6452  "variadic arguments for vector types are unimplemented for AIX");
6453 
6454  if (unsigned VReg = State.AllocateReg(VR))
6455  State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6456  else {
6458  "passing vector parameters to the stack is unimplemented for AIX");
6459  }
6460  return false;
6461  }
6462  }
6463  return true;
6464 }
6465 
6467  bool IsPPC64) {
6468  assert((IsPPC64 || SVT != MVT::i64) &&
6469  "i64 should have been split for 32-bit codegen.");
6470 
6471  switch (SVT) {
6472  default:
6473  report_fatal_error("Unexpected value type for formal argument");
6474  case MVT::i1:
6475  case MVT::i32:
6476  case MVT::i64:
6477  return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6478  case MVT::f32:
6479  return &PPC::F4RCRegClass;
6480  case MVT::f64:
6481  return &PPC::F8RCRegClass;
6482  case MVT::v4f32:
6483  case MVT::v4i32:
6484  case MVT::v8i16:
6485  case MVT::v16i8:
6486  case MVT::v2i64:
6487  case MVT::v2f64:
6488  case MVT::v1i128:
6489  return &PPC::VRRCRegClass;
6490  }
6491 }
6492 
6494  SelectionDAG &DAG, SDValue ArgValue,
6495  MVT LocVT, const SDLoc &dl) {
6496  assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
6497  assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
6498 
6499  if (Flags.isSExt())
6500  ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
6501  DAG.getValueType(ValVT));
6502  else if (Flags.isZExt())
6503  ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
6504  DAG.getValueType(ValVT));
6505 
6506  return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
6507 }
6508 
6509 static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
6510  const unsigned LASize = FL->getLinkageSize();
6511 
6512  if (PPC::GPRCRegClass.contains(Reg)) {
6513  assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
6514  "Reg must be a valid argument register!");
6515  return LASize + 4 * (Reg - PPC::R3);
6516  }
6517 
6518  if (PPC::G8RCRegClass.contains(Reg)) {
6519  assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
6520  "Reg must be a valid argument register!");
6521  return LASize + 8 * (Reg - PPC::X3);
6522  }
6523 
6524  llvm_unreachable("Only general purpose registers expected.");
6525 }
6526 
6527 // AIX ABI Stack Frame Layout:
6528 //
6529 // Low Memory +--------------------------------------------+
6530 // SP +---> | Back chain | ---+
6531 // | +--------------------------------------------+ |
6532 // | | Saved Condition Register | |
6533 // | +--------------------------------------------+ |
6534 // | | Saved Linkage Register | |
6535 // | +--------------------------------------------+ | Linkage Area
6536 // | | Reserved for compilers | |
6537 // | +--------------------------------------------+ |
6538 // | | Reserved for binders | |
6539 // | +--------------------------------------------+ |
6540 // | | Saved TOC pointer | ---+
6541 // | +--------------------------------------------+
6542 // | | Parameter save area |
6543 // | +--------------------------------------------+
6544 // | | Alloca space |
6545 // | +--------------------------------------------+
6546 // | | Local variable space |
6547 // | +--------------------------------------------+
6548 // | | Float/int conversion temporary |
6549 // | +--------------------------------------------+
6550 // | | Save area for AltiVec registers |
6551 // | +--------------------------------------------+
6552 // | | AltiVec alignment padding |
6553 // | +--------------------------------------------+
6554 // | | Save area for VRSAVE register |
6555 // | +--------------------------------------------+
6556 // | | Save area for General Purpose registers |
6557 // | +--------------------------------------------+
6558 // | | Save area for Floating Point registers |
6559 // | +--------------------------------------------+
6560 // +---- | Back chain |
6561 // High Memory +--------------------------------------------+
6562 //
6563 // Specifications:
6564 // AIX 7.2 Assembler Language Reference
6565 // Subroutine linkage convention
6566 
6567 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
6568  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
6569  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6570  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
6571 
6572  assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
6573  CallConv == CallingConv::Fast) &&
6574  "Unexpected calling convention!");
6575 
6576  if (getTargetMachine().Options.GuaranteedTailCallOpt)
6577  report_fatal_error("Tail call support is unimplemented on AIX.");
6578 
6579  if (useSoftFloat())
6580  report_fatal_error("Soft float support is unimplemented on AIX.");
6581 
6582  const PPCSubtarget &Subtarget =
6583  static_cast<const PPCSubtarget &>(DAG.getSubtarget());
6584 
6585  const bool IsPPC64 = Subtarget.isPPC64();
6586  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6587 
6588  // Assign locations to all of the incoming arguments.
6590  MachineFunction &MF = DAG.getMachineFunction();
6591  MachineFrameInfo &MFI = MF.getFrameInfo();
6592  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
6593  CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
6594 
6595  const EVT PtrVT = getPointerTy(MF.getDataLayout());
6596  // Reserve space for the linkage area on the stack.
6597  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6598  CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
6599  CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
6600 
6601  SmallVector<SDValue, 8> MemOps;
6602 
6603  for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
6604  CCValAssign &VA = ArgLocs[I++];
6605  MVT LocVT = VA.getLocVT();
6606  ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
6607  if (VA.isMemLoc() && VA.getValVT().isVector())
6609  "passing vector parameters to the stack is unimplemented for AIX");
6610 
6611  // For compatibility with the AIX XL compiler, the float args in the
6612  // parameter save area are initialized even if the argument is available
6613  // in register. The caller is required to initialize both the register
6614  // and memory, however, the callee can choose to expect it in either.
6615  // The memloc is dismissed here because the argument is retrieved from
6616  // the register.
6617  if (VA.isMemLoc() && VA.needsCustom())
6618  continue;
6619 
6620  if (VA.isRegLoc()) {
6621  if (VA.getValVT().isScalarInteger())
6623  else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector())
6624  FuncInfo->appendParameterType(VA.getValVT().SimpleTy == MVT::f32
6627  }
6628 
6629  if (Flags.isByVal() && VA.isMemLoc()) {
6630  const unsigned Size =
6631  alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
6632  PtrByteSize);
6633  const int FI = MF.getFrameInfo().CreateFixedObject(
6634  Size, VA.getLocMemOffset(), /* IsImmutable */ false,
6635  /* IsAliased */ true);
6636  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6637  InVals.push_back(FIN);
6638 
6639  continue;
6640  }
6641 
6642  if (Flags.isByVal()) {
6643  assert(VA.isRegLoc() && "MemLocs should already be handled.");
6644 
6645  const MCPhysReg ArgReg = VA.getLocReg();
6646  const PPCFrameLowering *FL = Subtarget.getFrameLowering();
6647 
6648  if (Flags.getNonZeroByValAlign() > PtrByteSize)
6649  report_fatal_error("Over aligned byvals not supported yet.");
6650 
6651  const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
6652  const int FI = MF.getFrameInfo().CreateFixedObject(
6653  StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
6654  /* IsAliased */ true);
6655  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6656  InVals.push_back(FIN);
6657 
6658  // Add live ins for all the RegLocs for the same ByVal.
6659  const TargetRegisterClass *RegClass =
6660  IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6661 
6662  auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
6663  unsigned Offset) {
6664  const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
6665  // Since the callers side has left justified the aggregate in the
6666  // register, we can simply store the entire register into the stack
6667  // slot.
6668  SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
6669  // The store to the fixedstack object is needed becuase accessing a
6670  // field of the ByVal will use a gep and load. Ideally we will optimize
6671  // to extracting the value from the register directly, and elide the
6672  // stores when the arguments address is not taken, but that will need to
6673  // be future work.
6674  SDValue Store = DAG.getStore(
6675  CopyFrom.getValue(1), dl, CopyFrom,
6676  DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
6678 
6679  MemOps.push_back(Store);
6680  };
6681 
6682  unsigned Offset = 0;
6683  HandleRegLoc(VA.getLocReg(), Offset);
6684  Offset += PtrByteSize;
6685  for (; Offset != StackSize && ArgLocs[I].isRegLoc();
6686  Offset += PtrByteSize) {
6687  assert(ArgLocs[I].getValNo() == VA.getValNo() &&
6688  "RegLocs should be for ByVal argument.");
6689 
6690  const CCValAssign RL = ArgLocs[I++];
6691  HandleRegLoc(RL.getLocReg(), Offset);
6693  }
6694 
6695  if (Offset != StackSize) {
6696  assert(ArgLocs[I].getValNo() == VA.getValNo() &&
6697  "Expected MemLoc for remaining bytes.");
6698  assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
6699  // Consume the MemLoc.The InVal has already been emitted, so nothing
6700  // more needs to be done.
6701  ++I;
6702  }
6703 
6704  continue;
6705  }
6706 
6707  EVT ValVT = VA.getValVT();
6708  if (VA.isRegLoc() && !VA.needsCustom()) {
6710  unsigned VReg =
6711  MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
6712  SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
6713  if (ValVT.isScalarInteger() &&
6714  (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
6715  ArgValue =
6716  truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
6717  }
6718  InVals.push_back(ArgValue);
6719  continue;
6720  }
6721  if (VA.isMemLoc()) {
6722  const unsigned LocSize = LocVT.getStoreSize();
6723  const unsigned ValSize = ValVT.getStoreSize();
6724  assert((ValSize <= LocSize) &&
6725  "Object size is larger than size of MemLoc");
6726  int CurArgOffset = VA.getLocMemOffset();
6727  // Objects are right-justified because AIX is big-endian.
6728  if (LocSize > ValSize)
6729  CurArgOffset += LocSize - ValSize;
6730  // Potential tail calls could cause overwriting of argument stack slots.
6731  const bool IsImmutable =
6733  (CallConv == CallingConv::Fast));
6734  int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
6735  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6736  SDValue ArgValue =
6737  DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
6738  InVals.push_back(ArgValue);
6739  continue;
6740  }
6741  }
6742 
6743  // On AIX a minimum of 8 words is saved to the parameter save area.
6744  const unsigned MinParameterSaveArea = 8 * PtrByteSize;
6745  // Area that is at least reserved in the caller of this function.
6746  unsigned CallerReservedArea =
6747  std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
6748 
6749  // Set the size that is at least reserved in caller of this function. Tail
6750  // call optimized function's reserved stack space needs to be aligned so
6751  // that taking the difference between two stack areas will result in an
6752  // aligned stack.
6753  CallerReservedArea =
6754  EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
6755  FuncInfo->setMinReservedArea(CallerReservedArea);
6756 
6757  if (isVarArg) {
6758  FuncInfo->setVarArgsFrameIndex(
6759  MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
6760  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
6761 
6762  static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6763  PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6764 
6765  static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6766  PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6767  const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
6768 
6769  // The fixed integer arguments of a variadic function are stored to the
6770  // VarArgsFrameIndex on the stack so that they may be loaded by
6771  // dereferencing the result of va_next.
6772  for (unsigned GPRIndex =
6773  (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
6774  GPRIndex < NumGPArgRegs; ++GPRIndex) {
6775 
6776  const unsigned VReg =
6777  IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
6778  : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
6779 
6780  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
6781  SDValue Store =
6782  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
6783  MemOps.push_back(Store);
6784  // Increment the address for the next argument to store.
6785  SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
6786  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
6787  }
6788  }
6789 
6790  if (!MemOps.empty())
6791  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
6792 
6793  return Chain;
6794 }
6795 
6796 SDValue PPCTargetLowering::LowerCall_AIX(
6797  SDValue Chain, SDValue Callee, CallFlags CFlags,
6798  const SmallVectorImpl<ISD::OutputArg> &Outs,
6799  const SmallVectorImpl<SDValue> &OutVals,
6800  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6801  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6802  const CallBase *CB) const {
6803  // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
6804  // AIX ABI stack frame layout.
6805 
6806  assert((CFlags.CallConv == CallingConv::C ||
6807  CFlags.CallConv == CallingConv::Cold ||
6808  CFlags.CallConv == CallingConv::Fast) &&
6809  "Unexpected calling convention!");
6810 
6811  if (CFlags.IsPatchPoint)
6812  report_fatal_error("This call type is unimplemented on AIX.");
6813 
6814  const PPCSubtarget& Subtarget =
6815  static_cast<const PPCSubtarget&>(DAG.getSubtarget());
6816 
6817  MachineFunction &MF = DAG.getMachineFunction();
6819  CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
6820  *DAG.getContext());
6821 
6822  // Reserve space for the linkage save area (LSA) on the stack.
6823  // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
6824  // [SP][CR][LR][2 x reserved][TOC].
6825  // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
6826  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6827  const bool IsPPC64 = Subtarget.isPPC64();
6828  const EVT PtrVT = getPointerTy(DAG.getDataLayout());
6829  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6830  CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
6831  CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
6832 
6833  // The prolog code of the callee may store up to 8 GPR argument registers to
6834  // the stack, allowing va_start to index over them in memory if the callee
6835  // is variadic.
6836  // Because we cannot tell if this is needed on the caller side, we have to
6837  // conservatively assume that it is needed. As such, make sure we have at
6838  // least enough stack space for the caller to store the 8 GPRs.
6839  const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
6840  const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
6841  CCInfo.getNextStackOffset());
6842 
6843  // Adjust the stack pointer for the new arguments...
6844  // These operations are automatically eliminated by the prolog/epilog pass.
6845  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6846  SDValue CallSeqStart = Chain;
6847 
6849  SmallVector<SDValue, 8> MemOpChains;
6850 
6851  // Set up a copy of the stack pointer for loading and storing any
6852  // arguments that may not fit in the registers available for argument
6853  // passing.
6854  const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
6855  : DAG.getRegister(PPC::R1, MVT::i32);
6856 
6857  for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
6858  const unsigned ValNo = ArgLocs[I].getValNo();
6859  SDValue Arg = OutVals[ValNo];
6860  ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
6861 
6862  if (Flags.isByVal()) {
6863  const unsigned ByValSize = Flags.getByValSize();
6864 
6865  // Nothing to do for zero-sized ByVals on the caller side.
6866  if (!ByValSize) {
6867  ++I;
6868  continue;
6869  }
6870 
6871  auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
6872  return DAG.getExtLoad(
6873  ISD::ZEXTLOAD, dl, PtrVT, Chain,
6874  (LoadOffset != 0)
6875  ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
6876  : Arg,
6877  MachinePointerInfo(), VT);
6878  };
6879 
6880  unsigned LoadOffset = 0;
6881 
6882  // Initialize registers, which are fully occupied by the by-val argument.
6883  while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
6884  SDValue Load = GetLoad(PtrVT, LoadOffset);
6885  MemOpChains.push_back(Load.getValue(1));
6886  LoadOffset += PtrByteSize;
6887  const CCValAssign &ByValVA = ArgLocs[I++];
6888  assert(ByValVA.getValNo() == ValNo &&
6889  "Unexpected location for pass-by-value argument.");
6890  RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
6891  }
6892 
6893  if (LoadOffset == ByValSize)
6894  continue;
6895 
6896  // There must be one more loc to handle the remainder.
6897  assert(ArgLocs[I].getValNo() == ValNo &&
6898  "Expected additional location for by-value argument.");
6899 
6900  if (ArgLocs[I].isMemLoc()) {
6901  assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
6902  const CCValAssign &ByValVA = ArgLocs[I++];
6903  ISD::ArgFlagsTy MemcpyFlags = Flags;
6904  // Only memcpy the bytes that don't pass in register.
6905  MemcpyFlags.setByValSize(ByValSize - LoadOffset);
6906  Chain = CallSeqStart = createMemcpyOutsideCallSeq(
6907  (LoadOffset != 0)
6908  ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
6909  : Arg,
6910  DAG.getObjectPtrOffset(dl, StackPtr,
6911  TypeSize::Fixed(ByValVA.getLocMemOffset())),
6912  CallSeqStart, MemcpyFlags, DAG, dl);
6913  continue;
6914  }
6915 
6916  // Initialize the final register residue.
6917  // Any residue that occupies the final by-val arg register must be
6918  // left-justified on AIX. Loads must be a power-of-2 size and cannot be
6919  // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
6920  // 2 and 1 byte loads.
6921  const unsigned ResidueBytes = ByValSize % PtrByteSize;
6922  assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
6923  "Unexpected register residue for by-value argument.");
6924  SDValue ResidueVal;
6925  for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
6926  const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
6927  const MVT VT =
6928  N == 1 ? MVT::i8
6929  : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
6930  SDValue Load = GetLoad(VT, LoadOffset);
6931  MemOpChains.push_back(Load.getValue(1));
6932  LoadOffset += N;
6933  Bytes += N;
6934 
6935  // By-val arguments are passed left-justfied in register.
6936  // Every load here needs to be shifted, otherwise a full register load
6937  // should have been used.
6938  assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
6939  "Unexpected load emitted during handling of pass-by-value "
6940  "argument.");
6941  unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
6942  EVT ShiftAmountTy =
6943  getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
6944  SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
6945  SDValue ShiftedLoad =
6946  DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
6947  ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
6948  ShiftedLoad)
6949  : ShiftedLoad;
6950  }
6951 
6952  const CCValAssign &ByValVA = ArgLocs[I++];
6953  RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
6954  continue;
6955  }
6956 
6957  CCValAssign &VA = ArgLocs[I++];
6958  const MVT LocVT = VA.getLocVT();
6959  const MVT ValVT = VA.getValVT();
6960 
6961  if (VA.isMemLoc() && VA.getValVT().isVector())
6963  "passing vector parameters to the stack is unimplemented for AIX");
6964 
6965  switch (VA.getLocInfo()) {
6966  default:
6967  report_fatal_error("Unexpected argument extension type.");
6968  case CCValAssign::Full:
6969  break;
6970  case CCValAssign::ZExt:
6971  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6972  break;
6973  case CCValAssign::SExt:
6974  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6975  break;
6976  }
6977 
6978  if (VA.isRegLoc() && !VA.needsCustom()) {
6979  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6980  continue;
6981  }
6982 
6983  if (VA.isMemLoc()) {
6984  SDValue PtrOff =
6985  DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
6986  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6987  MemOpChains.push_back(
6988  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6989 
6990  continue;
6991  }
6992 
6993  // Custom handling is used for GPR initializations for vararg float
6994  // arguments.
6995  assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
6996  ValVT.isFloatingPoint() && LocVT.isInteger() &&
6997  "Unexpected register handling for calling convention.");
6998 
6999  SDValue ArgAsInt =
7001 
7002  if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7003  // f32 in 32-bit GPR
7004  // f64 in 64-bit GPR
7005  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7006  else if (Arg.getValueType().getFixedSizeInBits() <
7007  LocVT.getFixedSizeInBits())
7008  // f32 in 64-bit GPR.
7009  RegsToPass.push_back(std::make_pair(
7010  VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7011  else {
7012  // f64 in two 32-bit GPRs
7013  // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7014  assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7015  "Unexpected custom register for argument!");
7016  CCValAssign &GPR1 = VA;
7017  SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7018  DAG.getConstant(32, dl, MVT::i8));
7019  RegsToPass.push_back(std::make_pair(
7020  GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7021 
7022  if (I != E) {
7023  // If only 1 GPR was available, there will only be one custom GPR and
7024  // the argument will also pass in memory.
7025  CCValAssign &PeekArg = ArgLocs[I];
7026  if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7027  assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7028  CCValAssign &GPR2 = ArgLocs[I++];
7029  RegsToPass.push_back(std::make_pair(
7030  GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7031  }
7032  }
7033  }
7034  }
7035 
7036  if (!MemOpChains.empty())
7037  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7038 
7039  // For indirect calls, we need to save the TOC base to the stack for
7040  // restoration after the call.
7041  if (CFlags.IsIndirect) {
7042  assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7043  const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7044  const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7045  const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7046  const unsigned TOCSaveOffset =
7047  Subtarget.getFrameLowering()->getTOCSaveOffset();
7048 
7049  setUsesTOCBasePtr(DAG);
7050  SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7051  SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7052  SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7053  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7054  Chain = DAG.getStore(
7055  Val.getValue(1), dl, Val, AddPtr,
7056  MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7057  }
7058 
7059  // Build a sequence of copy-to-reg nodes chained together with token chain
7060  // and flag operands which copy the outgoing args into the appropriate regs.
7061  SDValue InFlag;
7062  for (auto Reg : RegsToPass) {
7063  Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7064  InFlag = Chain.getValue(1);
7065  }
7066 
7067  const int SPDiff = 0;
7068  return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7069  Callee, SPDiff, NumBytes, Ins, InVals, CB);
7070 }
7071 
7072 bool
7073 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7074  MachineFunction &MF, bool isVarArg,
7075  const SmallVectorImpl<ISD::OutputArg> &Outs,
7076  LLVMContext &Context) const {
7078  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7079  return CCInfo.CheckReturn(
7080  Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7081  ? RetCC_PPC_Cold
7082  : RetCC_PPC);
7083 }
7084 
7085 SDValue
7086 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7087  bool isVarArg,
7088  const SmallVectorImpl<ISD::OutputArg> &Outs,
7089  const SmallVectorImpl<SDValue> &OutVals,
7090  const SDLoc &dl, SelectionDAG &DAG) const {
7092  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7093  *DAG.getContext());
7094  CCInfo.AnalyzeReturn(Outs,
7095  (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7096  ? RetCC_PPC_Cold
7097  : RetCC_PPC);
7098 
7099  SDValue Flag;
7100  SmallVector<SDValue, 4> RetOps(1, Chain);
7101 
7102  // Copy the result values into the output registers.
7103  for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7104  CCValAssign &VA = RVLocs[i];
7105  assert(VA.isRegLoc() && "Can only return in registers!");
7106 
7107  SDValue Arg = OutVals[RealResIdx];
7108 
7109  switch (VA.getLocInfo()) {
7110  default: llvm_unreachable("Unknown loc info!");
7111  case CCValAssign::Full: break;
7112  case CCValAssign::AExt:
7113  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7114  break;
7115  case CCValAssign::ZExt:
7116  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7117  break;
7118  case CCValAssign::SExt:
7119  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7120  break;
7121  }
7122  if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7123  bool isLittleEndian = Subtarget.isLittleEndian();
7124  // Legalize ret f64 -> ret 2 x i32.
7125  SDValue SVal =
7127  DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7128  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7129  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7130  SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7131  DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7132  Flag = Chain.getValue(1);
7133  VA = RVLocs[++i]; // skip ahead to next loc
7134  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7135  } else
7136  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7137  Flag = Chain.getValue(1);
7138  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7139  }
7140 
7141  RetOps[0] = Chain; // Update chain.
7142 
7143  // Add the flag if we have it.
7144  if (Flag.getNode())
7145  RetOps.push_back(Flag);
7146 
7147  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7148 }
7149 
7150 SDValue
7151 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7152  SelectionDAG &DAG) const {
7153  SDLoc dl(Op);
7154 
7155  // Get the correct type for integers.
7156  EVT IntVT = Op.getValueType();
7157 
7158  // Get the inputs.
7159  SDValue Chain = Op.getOperand(0);
7160  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7161  // Build a DYNAREAOFFSET node.
7162  SDValue Ops[2] = {Chain, FPSIdx};
7163  SDVTList VTs = DAG.getVTList(IntVT);
7164  return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7165 }
7166 
7167 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7168  SelectionDAG &DAG) const {
7169  // When we pop the dynamic allocation we need to restore the SP link.
7170  SDLoc dl(Op);
7171 
7172  // Get the correct type for pointers.
7173  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7174 
7175  // Construct the stack pointer operand.
7176  bool isPPC64 = Subtarget.isPPC64();
7177  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7178  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7179 
7180  // Get the operands for the STACKRESTORE.
7181  SDValue Chain = Op.getOperand(0);
7182  SDValue SaveSP = Op.getOperand(1);
7183 
7184  // Load the old link SP.
7185  SDValue LoadLinkSP =
7186  DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7187 
7188  // Restore the stack pointer.
7189  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7190 
7191  // Store the old link SP.
7192  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7193 }
7194 
7195 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7196  MachineFunction &MF = DAG.getMachineFunction();
7197  bool isPPC64 = Subtarget.isPPC64();
7198  EVT PtrVT = getPointerTy(MF.getDataLayout());
7199 
7200  // Get current frame pointer save index. The users of this index will be
7201  // primarily DYNALLOC instructions.
7203  int RASI = FI->getReturnAddrSaveIndex();
7204 
7205  // If the frame pointer save index hasn't been defined yet.
7206  if (!RASI) {
7207  // Find out what the fix offset of the frame pointer save area.
7208  int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7209  // Allocate the frame index for frame pointer save area.
7210  RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7211  // Save the result.
7212  FI->setReturnAddrSaveIndex(RASI);
7213  }
7214  return DAG.getFrameIndex(RASI, PtrVT);
7215 }
7216 
7217 SDValue
7218 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7219  MachineFunction &MF = DAG.getMachineFunction();
7220  bool isPPC64 = Subtarget.isPPC64();
7221  EVT PtrVT = getPointerTy(MF.getDataLayout());
7222 
7223  // Get current frame pointer save index. The users of this index will be
7224  // primarily DYNALLOC instructions.
7226  int FPSI = FI->getFramePointerSaveIndex();
7227 
7228  // If the frame pointer save index hasn't been defined yet.
7229  if (!FPSI) {
7230  // Find out what the fix offset of the frame pointer save area.
7231  int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7232  // Allocate the frame index for frame pointer save area.
7233  FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7234  // Save the result.
7235  FI->setFramePointerSaveIndex(FPSI);
7236  }
7237  return DAG.getFrameIndex(FPSI, PtrVT);
7238 }
7239 
7240 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7241  SelectionDAG &DAG) const {
7242  MachineFunction &MF = DAG.getMachineFunction();
7243  // Get the inputs.
7244  SDValue Chain = Op.getOperand(0);
7245  SDValue Size = Op.getOperand(1);
7246  SDLoc dl(Op);
7247 
7248  // Get the correct type for pointers.
7249  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7250  // Negate the size.
7251  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7252  DAG.getConstant(0, dl, PtrVT), Size);
7253  // Construct a node for the frame pointer save index.
7254  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7255  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7256  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7257  if (hasInlineStackProbe(MF))
7258  return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7259  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7260 }
7261 
7262 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7263  SelectionDAG &DAG) const {
7264  MachineFunction &MF = DAG.getMachineFunction();
7265 
7266  bool isPPC64 = Subtarget.isPPC64();
7267  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7268 
7269  int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7270  return DAG.getFrameIndex(FI, PtrVT);
7271 }
7272 
7273 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7274  SelectionDAG &DAG) const {
7275  SDLoc DL(Op);
7276  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7278  Op.getOperand(0), Op.getOperand(1));
7279 }
7280 
7281 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7282  SelectionDAG &DAG) const {
7283  SDLoc DL(Op);
7285  Op.getOperand(0), Op.getOperand(1));
7286 }
7287 
7288 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7289  if (Op.getValueType().isVector())
7290  return LowerVectorLoad(Op, DAG);
7291 
7292  assert(Op.getValueType() == MVT::i1 &&
7293  "Custom lowering only for i1 loads");
7294 
7295  // First, load 8 bits into 32 bits, then truncate to 1 bit.
7296 
7297  SDLoc dl(Op);
7298  LoadSDNode *LD = cast<LoadSDNode>(Op);
7299 
7300  SDValue Chain = LD->getChain();
7301  SDValue BasePtr = LD->getBasePtr();
7302  MachineMemOperand *MMO = LD->getMemOperand();
7303 
7304  SDValue NewLD =
7305  DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7306  BasePtr, MVT::i8, MMO);
7307  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7308 
7309  SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7310  return DAG.getMergeValues(Ops, dl);
7311 }
7312 
7313 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7314  if (Op.getOperand(1).getValueType().isVector())
7315  return LowerVectorStore(Op, DAG);
7316 
7317  assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7318  "Custom lowering only for i1 stores");
7319 
7320  // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7321 
7322  SDLoc dl(Op);
7323  StoreSDNode *ST = cast<StoreSDNode>(Op);
7324 
7325  SDValue Chain = ST->getChain();
7326  SDValue BasePtr = ST->getBasePtr();
7327  SDValue Value = ST->getValue();
7328  MachineMemOperand *MMO = ST->getMemOperand();
7329 
7331  Value);
7332  return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7333 }
7334 
7335 // FIXME: Remove this once the ANDI glue bug is fixed:
7336 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7337  assert(Op.getValueType() == MVT::i1 &&
7338  "Custom lowering only for i1 results");
7339 
7340  SDLoc DL(Op);
7341  return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7342 }
7343 
7344 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7345  SelectionDAG &DAG) const {
7346 
7347  // Implements a vector truncate that fits in a vector register as a shuffle.
7348  // We want to legalize vector truncates down to where the source fits in
7349  // a vector register (and target is therefore smaller than vector register
7350  // size). At that point legalization will try to custom lower the sub-legal
7351  // result and get here - where we can contain the truncate as a single target
7352  // operation.
7353 
7354  // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7355  // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7356  //
7357  // We will implement it for big-endian ordering as this (where x denotes
7358  // undefined):
7359  // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7360  // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7361  //
7362  // The same operation in little-endian ordering will be:
7363  // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7364  // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7365 
7366  EVT TrgVT = Op.getValueType();
7367  assert(TrgVT.isVector() && "Vector type expected.");
7368  unsigned TrgNumElts = TrgVT.getVectorNumElements();
7369  EVT EltVT = TrgVT.getVectorElementType();
7370  if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7371  TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7372  !isPowerOf2_32(EltVT.getSizeInBits()))
7373  return SDValue();
7374 
7375  SDValue N1 = Op.getOperand(0);
7376  EVT SrcVT = N1.getValueType();
7377  unsigned SrcSize = SrcVT.getSizeInBits();
7378  if (SrcSize > 256 ||
7379  !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
7381  return SDValue();
7382  if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7383  return SDValue();
7384 
7385  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7386  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7387 
7388  SDLoc DL(Op);
7389  SDValue Op1, Op2;
7390  if (SrcSize == 256) {
7391  EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
7392  EVT SplitVT =
7394  unsigned SplitNumElts = SplitVT.getVectorNumElements();
7395  Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7396  DAG.getConstant(0, DL, VecIdxTy));
7397  Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7398  DAG.getConstant(SplitNumElts, DL, VecIdxTy));
7399  }
7400  else {
7401  Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7402  Op2 = DAG.getUNDEF(WideVT);
7403  }
7404 
7405  // First list the elements we want to keep.
7406  unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7407  SmallVector<int, 16> ShuffV;
7408  if (Subtarget.isLittleEndian())
7409  for (unsigned i = 0; i < TrgNumElts; ++i)
7410  ShuffV.push_back(i * SizeMult);
7411  else
7412  for (unsigned i = 1; i <= TrgNumElts; ++i)
7413  ShuffV.push_back(i * SizeMult - 1);
7414 
7415  // Populate the remaining elements with undefs.
7416  for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7417  // ShuffV.push_back(i + WideNumElts);
7418  ShuffV.push_back(WideNumElts + 1);
7419 
7420  Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
7421  Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
7422  return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
7423 }
7424 
7425 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7426 /// possible.
7427 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7428  // Not FP, or using SPE? Not a fsel.
7429  if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
7430  !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())
7431  return Op;
7432 
7433  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7434 
7435  EVT ResVT = Op.getValueType();
7436  EVT CmpVT = Op.getOperand(0).getValueType();
7437  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7438  SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
7439  SDLoc dl(Op);
7440  SDNodeFlags Flags = Op.getNode()->getFlags();
7441 
7442  // We have xsmaxcdp/xsmincdp which are OK to emit even in the
7443  // presence of infinities.
7444  if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
7445  switch (CC) {
7446  default:
7447  break;
7448  case ISD::SETOGT:
7449  case ISD::SETGT:
7450  return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
7451  case ISD::SETOLT:
7452  case ISD::SETLT:
7453  return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
7454  }
7455  }
7456 
7457  // We might be able to do better than this under some circumstances, but in
7458  // general, fsel-based lowering of select is a finite-math-only optimization.
7459  // For more information, see section F.3 of the 2.06 ISA specification.
7460  // With ISA 3.0
7461  if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
7462  (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
7463  return Op;
7464 
7465  // If the RHS of the comparison is a 0.0, we don't need to do the
7466  // subtraction at all.
7467  SDValue Sel1;
7468  if (isFloatingPointZero(RHS))
7469  switch (CC) {
7470  default: break; // SETUO etc aren't handled by fsel.
7471  case ISD::SETNE:
7472  std::swap(TV, FV);
7474  case ISD::SETEQ:
7475  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7476  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7477  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7478  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7479  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7480  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7481  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
7482  case ISD::SETULT:
7483  case ISD::SETLT:
7484  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7486  case ISD::SETOGE:
7487  case ISD::SETGE:
7488  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7489  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7490  return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7491  case ISD::SETUGT:
7492  case ISD::SETGT:
7493  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7495  case ISD::SETOLE:
7496  case ISD::SETLE:
7497  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7498  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7499  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7500  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
7501  }
7502 
7503  SDValue Cmp;
7504  switch (CC) {
7505  default: break; // SETUO etc aren't handled by fsel.
7506  case ISD::SETNE:
7507  std::swap(TV, FV);
7509  case ISD::SETEQ:
7510  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7511  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7512  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7513  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7514  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7515  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7516  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7517  DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
7518  case ISD::SETULT:
7519  case ISD::SETLT:
7520  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7521  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7522  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7523  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7524  case ISD::SETOGE:
7525  case ISD::SETGE:
7526  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7527  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7528  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7529  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7530  case ISD::SETUGT:
7531  case ISD::SETGT:
7532  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7533  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7534  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7535  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7536  case ISD::SETOLE:
7537  case ISD::SETLE:
7538  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7539  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7540  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7541  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7542  }
7543  return Op;
7544 }
7545 
7546 static unsigned getPPCStrictOpcode(unsigned Opc) {
7547  switch (Opc) {
7548  default:
7549  llvm_unreachable("No strict version of this opcode!");
7550  case PPCISD::FCTIDZ:
7551  return PPCISD::STRICT_FCTIDZ;
7552  case PPCISD::FCTIWZ:
7553  return PPCISD::STRICT_FCTIWZ;
7554  case PPCISD::FCTIDUZ:
7555  return PPCISD::STRICT_FCTIDUZ;
7556  case PPCISD::FCTIWUZ:
7557  return PPCISD::STRICT_FCTIWUZ;
7558  case PPCISD::FCFID:
7559  return PPCISD::STRICT_FCFID;
7560  case PPCISD::FCFIDU:
7561  return PPCISD::STRICT_FCFIDU;
7562  case PPCISD::FCFIDS:
7563  return PPCISD::STRICT_FCFIDS;
7564  case PPCISD::FCFIDUS:
7565  return PPCISD::STRICT_FCFIDUS;
7566  }
7567 }
7568 
7570  const PPCSubtarget &Subtarget) {
7571  SDLoc dl(Op);
7572  bool IsStrict = Op->isStrictFPOpcode();
7573  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
7574  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
7575 
7576  // TODO: Any other flags to propagate?
7577  SDNodeFlags Flags;
7578  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7579 
7580  // For strict nodes, source is the second operand.
7581  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7582  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7584  if (Src.getValueType() == MVT::f32) {
7585  if (IsStrict) {
7586  Src =
7588  DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
7589  Chain = Src.getValue(1);
7590  } else
7591  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
7592  }
7593  SDValue Conv;
7594  unsigned Opc = ISD::DELETED_NODE;
7595  switch (Op.getSimpleValueType().SimpleTy) {
7596  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7597  case MVT::i32:
7598  Opc = IsSigned ? PPCISD::FCTIWZ
7599  : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
7600  break;
7601  case MVT::i64:
7602  assert((IsSigned || Subtarget.hasFPCVT()) &&
7603  "i64 FP_TO_UINT is supported only with FPCVT");
7604  Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
7605  }
7606  if (IsStrict) {
7607  Opc = getPPCStrictOpcode(Opc);
7608  Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
7609  {Chain, Src}, Flags);
7610  } else {
7611  Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
7612  }
7613  return Conv;
7614 }
7615 
7616 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
7617  SelectionDAG &DAG,
7618  const SDLoc &dl) const {
7619  SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
7620  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
7621  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
7622  bool IsStrict = Op->isStrictFPOpcode();
7623 
7624  // Convert the FP value to an int value through memory.
7625  bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
7626  (IsSigned || Subtarget.hasFPCVT());
7627  SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
7628  int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
7629  MachinePointerInfo MPI =
7631 
7632  // Emit a store to the stack slot.
7633  SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
7634  Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
7635  if (i32Stack) {
7636  MachineFunction &MF = DAG.getMachineFunction();
7637  Alignment = Align(4);
7638  MachineMemOperand *MMO =
7639  MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
7640  SDValue Ops[] = { Chain, Tmp, FIPtr };
7641  Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
7642  DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
7643  } else
7644  Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
7645 
7646  // Result is a load from the stack slot. If loading 4 bytes, make sure to
7647  // add in a bias on big endian.
7648  if (Op.getValueType() == MVT::i32 && !i32Stack) {
7649  FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
7650  DAG.getConstant(4, dl, FIPtr.getValueType()));
7651  MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
7652  }
7653 
7654  RLI.Chain = Chain;
7655  RLI.Ptr = FIPtr;
7656  RLI.MPI = MPI;
7657  RLI.Alignment = Alignment;
7658 }
7659 
7660 /// Custom lowers floating point to integer conversions to use
7661 /// the direct move instructions available in ISA 2.07 to avoid the
7662 /// need for load/store combinations.
7663 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
7664  SelectionDAG &DAG,
7665  const SDLoc &dl) const {
7666  SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
7667  SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
7668  if (Op->isStrictFPOpcode())
7669  return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
7670  else
7671  return Mov;
7672 }
7673 
7674 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
7675  const SDLoc &dl) const {
7676  bool IsStrict = Op->isStrictFPOpcode();
7677  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
7678  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
7679  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7680  EVT SrcVT = Src.getValueType();
7681  EVT DstVT = Op.getValueType();
7682 
7683  // FP to INT conversions are legal for f128.
7684  if (SrcVT == MVT::f128)
7685  return Subtarget.hasP9Vector() ? Op : SDValue();
7686 
7687  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
7688  // PPC (the libcall is not available).
7689  if (SrcVT == MVT::ppcf128) {
7690  if (DstVT == MVT::i32) {
7691  // TODO: Conservatively pass only nofpexcept flag here. Need to check and
7692  // set other fast-math flags to FP operations in both strict and
7693  // non-strict cases. (FP_TO_SINT, FSUB)
7694  SDNodeFlags Flags;
7695  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7696 
7697  if (IsSigned) {
7699  DAG.getIntPtrConstant(0, dl));
7701  DAG.getIntPtrConstant(1, dl));
7702 
7703  // Add the two halves of the long double in round-to-zero mode, and use
7704  // a smaller FP_TO_SINT.
7705  if (IsStrict) {
7706  SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
7708  {Op.getOperand(0), Lo, Hi}, Flags);
7709  return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
7711  {Res.getValue(1), Res}, Flags);
7712  } else {
7713  SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
7714  return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
7715  }
7716  } else {
7717  const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
7718  APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
7719  SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
7720  SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
7721  if (IsStrict) {
7722  // Sel = Src < 0x80000000
7723  // FltOfs = select Sel, 0.0, 0x80000000
7724  // IntOfs = select Sel, 0, 0x80000000
7725  // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
7726  SDValue Chain = Op.getOperand(0);
7727  EVT SetCCVT =
7728  getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
7729  EVT DstSetCCVT =
7730  getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
7731  SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
7732  Chain, true);
7733  Chain = Sel.getValue(1);
7734 
7735  SDValue FltOfs = DAG.getSelect(
7736  dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
7737  Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
7738 
7739  SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
7740  DAG.getVTList(SrcVT, MVT::Other),
7741  {Chain, Src, FltOfs}, Flags);
7742  Chain = Val.getValue(1);
7743  SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
7744  DAG.getVTList(DstVT, MVT::Other),
7745  {Chain, Val}, Flags);
7746  Chain = SInt.getValue(1);
7747  SDValue IntOfs = DAG.getSelect(
7748  dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
7749  SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
7750  return DAG.getMergeValues({Result, Chain}, dl);
7751  } else {
7752  // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
7753  // FIXME: generated code sucks.
7754  SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
7755  True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
7756  True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
7757  SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
7758  return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
7759  }
7760  }
7761  }
7762 
7763  return SDValue();
7764  }
7765 
7766  if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
7767  return LowerFP_TO_INTDirectMove(Op, DAG, dl);
7768 
7769  ReuseLoadInfo RLI;
7770  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7771 
7772  return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7773  RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7774 }
7775 
7776 // We're trying to insert a regular store, S, and then a load, L. If the
7777 // incoming value, O, is a load, we might just be able to have our load use the
7778 // address used by O. However, we don't know if anything else will store to
7779 // that address before we can load from it. To prevent this situation, we need
7780 // to insert our load, L, into the chain as a peer of O. To do this, we give L
7781 // the same chain operand as O, we create a token factor from the chain results
7782 // of O and L, and we replace all uses of O's chain result with that token
7783 // factor (see spliceIntoChain below for this last part).
7784 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
7785  ReuseLoadInfo &RLI,
7786  SelectionDAG &DAG,
7787  ISD::LoadExtType ET) const {
7788  // Conservatively skip reusing for constrained FP nodes.
7789  if (Op->isStrictFPOpcode())
7790  return false;
7791 
7792  SDLoc dl(Op);
7793  bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
7794  (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
7795  if (ET == ISD::NON_EXTLOAD &&
7796  (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
7797  isOperationLegalOrCustom(Op.getOpcode(),
7798  Op.getOperand(0).getValueType())) {
7799 
7800  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7801  return true;
7802  }
7803 
7804  LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
7805  if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
7806  LD->isNonTemporal())
7807  return false;
7808  if (LD->getMemoryVT() != MemVT)
7809  return false;
7810 
7811  // If the result of the load is an illegal type, then we can't build a
7812  // valid chain for reuse since the legalised loads and token factor node that
7813  // ties the legalised loads together uses a different output chain then the
7814  // illegal load.
7815  if (!isTypeLegal(LD->getValueType(0)))
7816  return false;
7817 
7818  RLI.Ptr = LD->getBasePtr();
7819  if (LD->isIndexed() && !LD->getOffset().isUndef()) {
7820  assert(LD->getAddressingMode() == ISD::PRE_INC &&
7821  "Non-pre-inc AM on PPC?");
7822  RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
7823  LD->getOffset());
7824  }
7825 
7826  RLI.Chain = LD->getChain();
7827  RLI.MPI = LD->getPointerInfo();
7828  RLI.IsDereferenceable = LD->isDereferenceable();
7829  RLI.IsInvariant = LD->isInvariant();
7830  RLI.Alignment = LD->getAlign();
7831  RLI.AAInfo = LD->getAAInfo();
7832  RLI.Ranges = LD->getRanges();
7833 
7834  RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
7835  return true;
7836 }
7837 
7838 // Given the head of the old chain, ResChain, insert a token factor containing
7839 // it and NewResChain, and make users of ResChain now be users of that token
7840 // factor.
7841 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
7842 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
7843  SDValue NewResChain,
7844  SelectionDAG &DAG) const {
7845  if (!ResChain)
7846  return;
7847 
7848  SDLoc dl(NewResChain);
7849 
7851  NewResChain, DAG.getUNDEF(MVT::Other));
7852  assert(TF.getNode() != NewResChain.getNode() &&
7853  "A new TF really is required here");
7854 
7855  DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
7856  DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
7857 }
7858 
7859 /// Analyze profitability of direct move
7860 /// prefer float load to int load plus direct move
7861 /// when there is no integer use of int load
7862 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
7863  SDNode *Origin = Op.getOperand(0).getNode();
7864  if (Origin->getOpcode() != ISD::LOAD)
7865  return true;
7866 
7867  // If there is no LXSIBZX/LXSIHZX, like Power8,
7868  // prefer direct move if the memory size is 1 or 2 bytes.
7869  MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
7870  if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
7871  return true;
7872 
7873  for (SDNode::use_iterator UI = Origin->use_begin(),
7874  UE = Origin->use_end();
7875  UI != UE; ++UI) {
7876 
7877  // Only look at the users of the loaded value.
7878  if (UI.getUse().get().getResNo() != 0)
7879  continue;
7880 
7881  if (UI->getOpcode() != ISD::SINT_TO_FP &&
7882  UI->getOpcode() != ISD::UINT_TO_FP &&
7883  UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
7884  UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
7885  return true;
7886  }
7887 
7888  return false;
7889 }
7890 
7892  const PPCSubtarget &Subtarget,
7893  SDValue Chain = SDValue()) {
7894  bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
7895  Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
7896  SDLoc dl(Op);
7897 
7898  // TODO: Any other flags to propagate?
7899  SDNodeFlags Flags;
7900  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7901 
7902  // If we have FCFIDS, then use it when converting to single-precision.
7903  // Otherwise, convert to double-precision and then round.
7904  bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
7905  unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
7906  : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
7907  EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
7908  if (Op->isStrictFPOpcode()) {
7909  if (!Chain)
7910  Chain = Op.getOperand(0);
7911  return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
7912  DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
7913  } else
7914  return DAG.getNode(ConvOpc, dl, ConvTy, Src);
7915 }
7916 
7917 /// Custom lowers integer to floating point conversions to use
7918 /// the direct move instructions available in ISA 2.07 to avoid the
7919 /// need for load/store combinations.
7920 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
7921  SelectionDAG &DAG,
7922  const SDLoc &dl) const {
7923  assert((Op.getValueType() == MVT::f32 ||
7924  Op.getValueType() == MVT::f64) &&
7925  "Invalid floating point type as target of conversion");
7926  assert(Subtarget.hasFPCVT() &&
7927  "Int to FP conversions with direct moves require FPCVT");
7928  SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
7929  bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
7930  bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
7931  Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
7932  unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
7933  SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
7934  return convertIntToFP(Op, Mov, DAG, Subtarget);
7935 }
7936 
7937 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
7938 
7939  EVT VecVT = Vec.getValueType();
7940  assert(VecVT.isVector() && "Expected a vector type.");
7941  assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
7942 
7943  EVT EltVT = VecVT.getVectorElementType();
7944  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7945  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7946 
7947  unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
7948  SmallVector<SDValue, 16> Ops(NumConcat);
7949  Ops[0] = Vec;
7950  SDValue UndefVec = DAG.getUNDEF(VecVT);
7951  for (unsigned i = 1; i < NumConcat; ++i)
7952  Ops[i] = UndefVec;
7953 
7954  return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
7955 }
7956 
7957 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
7958  const SDLoc &dl) const {
7959  bool IsStrict = Op->isStrictFPOpcode();
7960  unsigned Opc = Op.getOpcode();
7961  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7962  assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
7963  Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
7964  "Unexpected conversion type");
7965  assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
7966  "Supports conversions to v2f64/v4f32 only.");
7967 
7968  // TODO: Any other flags to propagate?
7969  SDNodeFlags Flags;
7970  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7971 
7972  bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
7973  bool FourEltRes = Op.getValueType() == MVT::v4f32;
7974 
7975  SDValue Wide = widenVec(DAG, Src, dl);
7976  EVT WideVT = Wide.getValueType();
7977  unsigned WideNumElts = WideVT.getVectorNumElements();
7978  MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
7979 
7980  SmallVector<int, 16> ShuffV;
7981  for (unsigned i = 0; i < WideNumElts; ++i)
7982  ShuffV.push_back(i + WideNumElts);
7983 
7984  int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
7985  int SaveElts = FourEltRes ? 4 : 2;
7986  if (Subtarget.isLittleEndian())
7987  for (int i = 0; i < SaveElts; i++)
7988  ShuffV[i * Stride] = i;
7989  else
7990  for (int i = 1; i <= SaveElts; i++)
7991  ShuffV[i * Stride - 1] = i - 1;
7992 
7993  SDValue ShuffleSrc2 =
7994  SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
7995  SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
7996 
7997  SDValue Extend;
7998  if (SignedConv) {
7999  Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8000  EVT ExtVT = Src.getValueType();
8001  if (Subtarget.hasP9Altivec())
8002  ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8003  IntermediateVT.getVectorNumElements());
8004 
8005  Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8006  DAG.getValueType(ExtVT));
8007  } else
8008  Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8009 
8010  if (IsStrict)
8011  return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8012  {Op.getOperand(0), Extend}, Flags);
8013 
8014  return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8015 }
8016 
8017 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8018  SelectionDAG &DAG) const {
8019  SDLoc dl(Op);
8020  bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8021  Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8022  bool IsStrict = Op->isStrictFPOpcode();
8023  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8024  SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8025 
8026  // TODO: Any other flags to propagate?
8027  SDNodeFlags Flags;
8028  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8029 
8030  EVT InVT = Src.getValueType();
8031  EVT OutVT = Op.getValueType();
8032  if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8033  isOperationCustom(Op.getOpcode(), InVT))
8034  return LowerINT_TO_FPVector(Op, DAG, dl);
8035 
8036  // Conversions to f128 are legal.
8037  if (Op.getValueType() == MVT::f128)
8038  return Subtarget.hasP9Vector() ? Op : SDValue();
8039 
8040  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8041  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8042  return SDValue();
8043 
8044  if (Src.getValueType() == MVT::i1) {
8045  SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8046  DAG.getConstantFP(1.0, dl, Op.getValueType()),
8047  DAG.getConstantFP(0.0, dl, Op.getValueType()));
8048  if (IsStrict)
8049  return DAG.getMergeValues({Sel, Chain}, dl);
8050  else
8051  return Sel;
8052  }
8053 
8054  // If we have direct moves, we can do all the conversion, skip the store/load
8055  // however, without FPCVT we can't do most conversions.
8056  if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8057  Subtarget.isPPC64() && Subtarget.hasFPCVT())
8058  return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8059 
8060  assert((IsSigned || Subtarget.hasFPCVT()) &&
8061  "UINT_TO_FP is supported only with FPCVT");
8062 
8063  if (Src.getValueType() == MVT::i64) {
8064  SDValue SINT = Src;
8065  // When converting to single-precision, we actually need to convert
8066  // to double-precision first and then round to single-precision.
8067  // To avoid double-rounding effects during that operation, we have
8068  // to prepare the input operand. Bits that might be truncated when
8069  // converting to double-precision are replaced by a bit that won't
8070  // be lost at this stage, but is below the single-precision rounding
8071  // position.
8072  //
8073  // However, if -enable-unsafe-fp-math is in effect, accept double
8074  // rounding to avoid the extra overhead.
8075  if (Op.getValueType() == MVT::f32 &&
8076  !Subtarget.hasFPCVT() &&
8077  !DAG.getTarget().Options.UnsafeFPMath) {
8078 
8079  // Twiddle input to make sure the low 11 bits are zero. (If this
8080  // is the case, we are guaranteed the value will fit into the 53 bit
8081  // mantissa of an IEEE double-precision value without rounding.)
8082  // If any of those low 11 bits were not zero originally, make sure
8083  // bit 12 (value 2048) is set instead, so that the final rounding
8084  // to single-precision gets the correct result.
8085  SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8086  SINT, DAG.getConstant(2047, dl, MVT::i64));
8087  Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8088  Round, DAG.getConstant(2047, dl, MVT::i64));
8089  Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8090  Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8091  Round, DAG.getConstant(-2048, dl, MVT::i64));
8092 
8093  // However, we cannot use that value unconditionally: if the magnitude
8094  // of the input value is small, the bit-twiddling we did above might
8095  // end up visibly changing the output. Fortunately, in that case, we
8096  // don't need to twiddle bits since the original input will convert
8097  // exactly to double-precision floating-point already. Therefore,
8098  // construct a conditional to use the original value if the top 11
8099  // bits are all sign-bit copies, and use the rounded value computed
8100  // above otherwise.
8101  SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8102  SINT, DAG.getConstant(53, dl, MVT::i32));
8103  Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8104  Cond, DAG.getConstant(1, dl, MVT::i64));
8105  Cond = DAG.getSetCC(
8106  dl,
8108  Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8109 
8110  SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8111  }
8112 
8113  ReuseLoadInfo RLI;
8114  SDValue Bits;
8115 
8116  MachineFunction &MF = DAG.getMachineFunction();
8117  if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8118  Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8119  RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8120  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8121  } else if (Subtarget.hasLFIWAX() &&
8122  canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8123  MachineMemOperand *MMO =
8125  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8126  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8129  Ops, MVT::i32, MMO);
8130  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8131  } else if (Subtarget.hasFPCVT() &&
8132  canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8133  MachineMemOperand *MMO =
8135  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8136  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8139  Ops, MVT::i32, MMO);
8140  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8141  } else if (((Subtarget.hasLFIWAX() &&
8142  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8143  (Subtarget.hasFPCVT() &&
8144  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8145  SINT.getOperand(0).getValueType() == MVT::i32) {
8146  MachineFrameInfo &MFI = MF.getFrameInfo();
8147  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8148 
8149  int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8150  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8151 
8152  SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8154  DAG.getMachineFunction(), FrameIdx));
8155  Chain = Store;
8156 
8157  assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8158  "Expected an i32 store");
8159 
8160  RLI.Ptr = FIdx;
8161  RLI.Chain = Chain;
8162  RLI.MPI =
8164  RLI.Alignment = Align(4);
8165 
8166  MachineMemOperand *MMO =
8168  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8169  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8172  dl, DAG.getVTList(MVT::f64, MVT::Other),
8173  Ops, MVT::i32, MMO);
8174  Chain = Bits.getValue(1);
8175  } else
8176  Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8177 
8178  SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8179  if (IsStrict)
8180  Chain = FP.getValue(1);
8181 
8182  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8183  if (IsStrict)
8184  FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8186  {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8187  else
8188  FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8189  DAG.getIntPtrConstant(0, dl));
8190  }
8191  return FP;
8192  }
8193 
8194  assert(Src.getValueType() == MVT::i32 &&
8195  "Unhandled INT_TO_FP type in custom expander!");
8196  // Since we only generate this in 64-bit mode, we can take advantage of
8197  // 64-bit registers. In particular, sign extend the input value into the
8198  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8199  // then lfd it and fcfid it.
8200  MachineFunction &MF = DAG.getMachineFunction();
8201  MachineFrameInfo &MFI = MF.getFrameInfo();
8202  EVT PtrVT = getPointerTy(MF.getDataLayout());
8203 
8204  SDValue Ld;
8205  if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8206  ReuseLoadInfo RLI;
8207  bool ReusingLoad;
8208  if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8209  int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8210  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8211 
8212  SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8214  DAG.getMachineFunction(), FrameIdx));
8215  Chain = Store;
8216 
8217  assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8218  "Expected an i32 store");
8219 
8220  RLI.Ptr = FIdx;
8221  RLI.Chain = Chain;
8222  RLI.MPI =
8224  RLI.Alignment = Align(4);
8225  }
8226 
8227  MachineMemOperand *MMO =
8229  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8230  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8231  Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8232  DAG.getVTList(MVT::f64, MVT::Other), Ops,
8233  MVT::i32, MMO);
8234  Chain = Ld.getValue(1);
8235  if (ReusingLoad)
8236  spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8237  } else {
8238  assert(Subtarget.isPPC64() &&
8239  "i32->FP without LFIWAX supported only on PPC64");
8240 
8241  int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8242  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8243 
8244  SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8245 
8246  // STD the extended value into the stack slot.
8247  SDValue Store = DAG.getStore(
8248  Chain, dl, Ext64, FIdx,
8250  Chain = Store;
8251 
8252  // Load the value as a double.
8253  Ld = DAG.getLoad(
8254  MVT::f64, dl, Chain, FIdx,
8256  Chain = Ld.getValue(1);
8257  }
8258 
8259  // FCFID it and return it.
8260  SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8261  if (IsStrict)
8262  Chain = FP.getValue(1);
8263  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8264  if (IsStrict)
8265  FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8267  {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8268  else
8269  FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8270  DAG.getIntPtrConstant(0, dl));
8271  }
8272  return FP;
8273 }
8274 
8275 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8276  SelectionDAG &DAG) const {
8277  SDLoc dl(Op);
8278  /*
8279  The rounding mode is in bits 30:31 of FPSR, and has the following
8280  settings:
8281  00 Round to nearest
8282  01 Round to 0
8283  10 Round to +inf
8284  11 Round to -inf
8285 
8286  FLT_ROUNDS, on the other hand, expects the following:
8287  -1 Undefined
8288  0 Round to 0
8289  1 Round to nearest
8290  2 Round to +inf
8291  3 Round to -inf
8292 
8293  To perform the conversion, we do:
8294  ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8295  */
8296 
8297  MachineFunction &MF = DAG.getMachineFunction();
8298  EVT VT = Op.getValueType();
8299  EVT PtrVT = getPointerTy(MF.getDataLayout());
8300 
8301  // Save FP Control Word to register
8302  SDValue Chain = Op.getOperand(0);
8303  SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8304  Chain = MFFS.getValue(1);
8305 
8306  SDValue CWD;
8307  if (isTypeLegal(MVT::i64)) {
8308  CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
8309  DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
8310  } else {
8311  // Save FP register to stack slot
8312  int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8313  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8314  Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8315 
8316  // Load FP Control Word from low 32 bits of stack slot.
8318  "Stack slot adjustment is valid only on big endian subtargets!");
8319  SDValue Four = DAG.getConstant(4, dl, PtrVT);
8320  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8321  CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8322  Chain = CWD.getValue(1);
8323  }
8324 
8325  // Transform as necessary
8326  SDValue CWD1 =
8327  DAG.getNode(ISD::AND, dl, MVT::i32,
8328  CWD, DAG.getConstant(3, dl, MVT::i32));
8329  SDValue CWD2 =
8330  DAG.getNode(ISD::SRL, dl, MVT::i32,
8331  DAG.getNode(ISD::AND, dl, MVT::i32,
8332  DAG.getNode(ISD::XOR, dl, MVT::i32,
8333  CWD, DAG.getConstant(3, dl, MVT::i32)),
8334  DAG.getConstant(3, dl, MVT::i32)),
8335  DAG.getConstant(1, dl, MVT::i32));
8336 
8337  SDValue RetVal =
8338  DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8339 
8340  RetVal =
8342  dl, VT, RetVal);
8343 
8344  return DAG.getMergeValues({RetVal, Chain}, dl);
8345 }
8346 
8347 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8348  EVT VT = Op.getValueType();
8349  unsigned BitWidth = VT.getSizeInBits();
8350  SDLoc dl(Op);
8351  assert(Op.getNumOperands() == 3 &&
8352  VT == Op.getOperand(1).getValueType() &&
8353  "Unexpected SHL!");
8354 
8355  // Expand into a bunch of logical ops. Note that these ops
8356  // depend on the PPC behavior for oversized shift amounts.
8357  SDValue Lo = Op.getOperand(0);
8358  SDValue Hi = Op.getOperand(1);
8359  SDValue Amt = Op.getOperand(2);
8360  EVT AmtVT = Amt.getValueType();
8361 
8362  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8363  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8364  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8365  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8366  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8367  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8368  DAG.getConstant(-BitWidth, dl, AmtVT));
8369  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8370  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8371  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8372  SDValue OutOps[] = { OutLo, OutHi };
8373  return DAG.getMergeValues(OutOps, dl);
8374 }
8375 
8376 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8377  EVT VT = Op.getValueType();
8378  SDLoc dl(Op);
8379  unsigned BitWidth = VT.getSizeInBits();
8380  assert(Op.getNumOperands() == 3 &&
8381  VT == Op.getOperand(1).getValueType() &&
8382  "Unexpected SRL!");
8383 
8384  // Expand into a bunch of logical ops. Note that these ops
8385  // depend on the PPC behavior for oversized shift amounts.
8386  SDValue Lo = Op.getOperand(0);
8387  SDValue Hi = Op.getOperand(1);
8388  SDValue Amt = Op.getOperand(2);
8389  EVT AmtVT = Amt.getValueType();
8390 
8391  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8392  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8393  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8394  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8395  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8396  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8397  DAG.getConstant(-BitWidth, dl, AmtVT));
8398  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8399  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8400  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8401  SDValue OutOps[] = { OutLo, OutHi };
8402  return DAG.getMergeValues(OutOps, dl);
8403 }
8404 
8405 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8406  SDLoc dl(Op);
8407  EVT VT = Op.getValueType();
8408  unsigned BitWidth = VT.getSizeInBits();
8409  assert(Op.getNumOperands() == 3 &&
8410  VT == Op.getOperand(1).getValueType() &&
8411  "Unexpected SRA!");
8412 
8413  // Expand into a bunch of logical ops, followed by a select_cc.
8414  SDValue Lo = Op.getOperand(0);
8415  SDValue Hi = Op.getOperand(1);
8416  SDValue Amt = Op.getOperand(2);
8417  EVT AmtVT = Amt.getValueType();
8418 
8419  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8420  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8421  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8422  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8423  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8424  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8425  DAG.getConstant(-BitWidth, dl, AmtVT));
8426  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8427  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8428  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8429  Tmp4, Tmp6, ISD::SETLE);
8430  SDValue OutOps[] = { OutLo, OutHi };
8431  return DAG.getMergeValues(OutOps, dl);
8432 }
8433 
8434 SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8435  SelectionDAG &DAG) const {
8436  SDLoc dl(Op);
8437  EVT VT = Op.getValueType();
8438  unsigned BitWidth = VT.getSizeInBits();
8439 
8440  bool IsFSHL = Op.getOpcode() == ISD::FSHL;
8441  SDValue X = Op.getOperand(0);
8442  SDValue Y = Op.getOperand(1);
8443  SDValue Z = Op.getOperand(2);
8444  EVT AmtVT = Z.getValueType();
8445 
8446  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8447  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8448  // This is simpler than TargetLowering::expandFunnelShift because we can rely
8449  // on PowerPC shift by BW being well defined.
8450  Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
8451  DAG.getConstant(BitWidth - 1, dl, AmtVT));
8452  SDValue SubZ =
8453  DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
8454  X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
8455  Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
8456  return DAG.getNode(ISD::OR, dl, VT, X, Y);
8457 }
8458 
8459 //===----------------------------------------------------------------------===//
8460 // Vector related lowering.
8461 //
8462 
8463 /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8464 /// element size of SplatSize. Cast the result to VT.
8465 static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8466  SelectionDAG &DAG, const SDLoc &dl) {
8467  static const MVT VTys[] = { // canonical VT to use for each size.
8469  };
8470 
8471  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8472 
8473  // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
8474  if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
8475  SplatSize = 1;
8476  Val = 0xFF;
8477  }
8478 
8479  EVT CanonicalVT = VTys[SplatSize-1];
8480 
8481  // Build a canonical splat for this value.
8482  return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8483 }
8484 
8485 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8486 /// specified intrinsic ID.
8487 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8488  const SDLoc &dl, EVT DestVT = MVT::Other) {
8489  if (DestVT == MVT::Other) DestVT = Op.getValueType();
8490  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8491  DAG.getConstant(IID, dl, MVT::i32), Op);
8492 }
8493 
8494 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8495 /// specified intrinsic ID.
8496 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
8497  SelectionDAG &DAG, const SDLoc &dl,
8498  EVT DestVT = MVT::Other) {
8499  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
8500  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8501  DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8502 }
8503 
8504 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8505 /// specified intrinsic ID.
8506 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8507  SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8508  EVT DestVT = MVT::Other) {
8509  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
8510  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8511  DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8512 }
8513 
8514 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8515 /// amount. The result has the specified value type.
8516 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
8517  SelectionDAG &DAG, const SDLoc &dl) {
8518  // Force LHS/RHS to be the right type.
8519  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
8520  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
8521 
8522  int Ops[16];
8523  for (unsigned i = 0; i != 16; ++i)
8524  Ops[i] = i + Amt;
8525  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
8526  return DAG.getNode(ISD::BITCAST, dl, VT, T);
8527 }
8528 
8529 /// Do we have an efficient pattern in a .td file for this node?
8530 ///
8531 /// \param V - pointer to the BuildVectorSDNode being matched
8532 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8533 ///
8534 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8535 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
8536 /// the opposite is true (expansion is beneficial) are:
8537 /// - The node builds a vector out of integers that are not 32 or 64-bits
8538 /// - The node builds a vector out of constants
8539 /// - The node is a "load-and-splat"
8540 /// In all other cases, we will choose to keep the BUILD_VECTOR.
8542  bool HasDirectMove,
8543  bool HasP8Vector) {
8544  EVT VecVT = V->getValueType(0);
8545  bool RightType = VecVT == MVT::v2f64 ||
8546  (HasP8Vector && VecVT == MVT::v4f32) ||
8547  (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
8548  if (!RightType)
8549  return false;
8550 
8551  bool IsSplat = true;
8552  bool IsLoad = false;
8553  SDValue Op0 = V->getOperand(0);
8554 
8555  // This function is called in a block that confirms the node is not a constant
8556  // splat. So a constant BUILD_VECTOR here means the vector is built out of
8557  // different constants.
8558  if (V->isConstant())
8559  return false;
8560  for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
8561  if (V->getOperand(i).isUndef())
8562  return false;
8563  // We want to expand nodes that represent load-and-splat even if the
8564  // loaded value is a floating point truncation or conversion to int.
8565  if (V->getOperand(i).getOpcode() == ISD::LOAD ||
8566  (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
8567  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8568  (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
8569  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8570  (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
8571  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
8572  IsLoad = true;
8573  // If the operands are different or the input is not a load and has more
8574  // uses than just this BV node, then it isn't a splat.
8575  if (V->getOperand(i) != Op0 ||
8576  (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
8577  IsSplat = false;
8578  }
8579  return !(IsSplat && IsLoad);
8580 }
8581 
8582 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
8583 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
8584 
8585  SDLoc dl(Op);
8586  SDValue Op0 = Op->getOperand(0);
8587 
8588  if ((Op.getValueType() != MVT::f128) ||
8589  (Op0.getOpcode() != ISD::BUILD_PAIR) ||
8590  (Op0.getOperand(0).getValueType() != MVT::i64) ||
8591  (Op0.getOperand(1).getValueType() != MVT::i64))
8592  return SDValue();
8593 
8594  return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
8595  Op0.getOperand(1));
8596 }
8597 
8598 static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
8599  const SDValue *InputLoad = &Op;
8600  if (InputLoad->getOpcode() == ISD::BITCAST)
8601  InputLoad = &InputLoad->getOperand(0);
8602  if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
8603  InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
8604  IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
8605  InputLoad = &InputLoad->getOperand(0);
8606  }
8607  if (InputLoad->getOpcode() != ISD::LOAD)
8608  return nullptr;
8609  LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
8610  return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
8611 }
8612 
8613 // Convert the argument APFloat to a single precision APFloat if there is no
8614 // loss in information during the conversion to single precision APFloat and the
8615 // resulting number is not a denormal number. Return true if successful.
8617  APFloat APFloatToConvert = ArgAPFloat;
8618  bool LosesInfo = true;
8620  &LosesInfo);
8621  bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
8622  if (Success)
8623  ArgAPFloat = APFloatToConvert;
8624  return Success;
8625 }
8626 
8627 // Bitcast the argument APInt to a double and convert it to a single precision
8628 // APFloat, bitcast the APFloat to an APInt and assign it to the original
8629 // argument if there is no loss in information during the conversion from
8630 // double to single precision APFloat and the resulting number is not a denormal
8631 // number. Return true if successful.
8633  double DpValue = ArgAPInt.bitsToDouble();
8634  APFloat APFloatDp(DpValue);
8635  bool Success = convertToNonDenormSingle(APFloatDp);
8636  if (Success)
8637  ArgAPInt = APFloatDp.bitcastToAPInt();
8638  return Success;
8639 }
8640 
8641 // If this is a case we can't handle, return null and let the default
8642 // expansion code take care of it. If we CAN select this case, and if it
8643 // selects to a single instruction, return Op. Otherwise, if we can codegen
8644 // this case more efficiently than a constant pool load, lower it to the
8645 // sequence of ops that should be used.
8646 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
8647  SelectionDAG &DAG) const {
8648  SDLoc dl(Op);
8649  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
8650  assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
8651 
8652  // Check if this is a splat of a constant value.
8653  APInt APSplatBits, APSplatUndef;
8654  unsigned SplatBitSize;
8655  bool HasAnyUndefs;
8656  bool BVNIsConstantSplat =
8657  BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
8658  HasAnyUndefs, 0, !Subtarget.isLittleEndian());
8659 
8660  // If it is a splat of a double, check if we can shrink it to a 32 bit
8661  // non-denormal float which when converted back to double gives us the same
8662  // double. This is to exploit the XXSPLTIDP instruction.
8663  // If we lose precision, we use XXSPLTI32DX.
8664  if (BVNIsConstantSplat && (SplatBitSize == 64) &&
8665  Subtarget.hasPrefixInstrs()) {
8666  // Check the type first to short-circuit so we don't modify APSplatBits if
8667  // this block isn't executed.
8668  if ((Op->getValueType(0) == MVT::v2f64) &&
8669  convertToNonDenormSingle(APSplatBits)) {
8670  SDValue SplatNode = DAG.getNode(
8672  DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
8673  return DAG.getBitcast(Op.getValueType(), SplatNode);
8674  } else {
8675  // We may lose precision, so we have to use XXSPLTI32DX.
8676 
8677  uint32_t Hi =
8678  (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
8679  uint32_t Lo =
8680  (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
8681  SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
8682 
8683  if (!Hi || !Lo)
8684  // If either load is 0, then we should generate XXLXOR to set to 0.
8685  SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
8686 
8687  if (Hi)
8688  SplatNode = DAG.getNode(
8689  PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
8690  DAG.getTargetConstant(0, dl, MVT::i32),
8691  DAG.getTargetConstant(Hi, dl, MVT::i32));
8692 
8693  if (Lo)
8694  SplatNode =
8695  DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
8696  DAG.getTargetConstant(1, dl, MVT::i32),
8697  DAG.getTargetConstant(Lo, dl, MVT::i32));
8698 
8699  return DAG.getBitcast(Op.getValueType(), SplatNode);
8700  }
8701  }
8702 
8703  if (!BVNIsConstantSplat || SplatBitSize > 32) {
8704 
8705  bool IsPermutedLoad = false;
8706  const SDValue *InputLoad =
8707  getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
8708  // Handle load-and-splat patterns as we have instructions that will do this
8709  // in one go.
8710  if (InputLoad && DAG.isSplatValue(Op, true)) {
8711  LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
8712 
8713  // We have handling for 4 and 8 byte elements.
8714  unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
8715 
8716  // Checking for a single use of this load, we have to check for vector
8717  // width (128 bits) / ElementSize uses (since each operand of the
8718  // BUILD_VECTOR is a separate use of the value.
8719  unsigned NumUsesOfInputLD = 128 / ElementSize;
8720  for (SDValue BVInOp : Op->ops())
8721  if (BVInOp.isUndef())
8722  NumUsesOfInputLD--;
8723  assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
8724  if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
8725  ((Subtarget.hasVSX() && ElementSize == 64) ||
8726  (Subtarget.hasP9Vector() && ElementSize == 32))) {
8727  SDValue Ops[] = {
8728  LD->getChain(), // Chain
8729  LD->getBasePtr(), // Ptr
8730  DAG.getValueType(Op.getValueType()) // VT
8731  };
8732  SDValue LdSplt = DAG.getMemIntrinsicNode(
8733  PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8734  Ops, LD->getMemoryVT(), LD->getMemOperand());
8735  // Replace all uses of the output chain of the original load with the
8736  // output chain of the new load.
8737  DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
8738  LdSplt.getValue(1));
8739  return LdSplt;
8740  }
8741  }
8742 
8743  // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
8744  // 32-bits can be lowered to VSX instructions under certain conditions.
8745  // Without VSX, there is no pattern more efficient than expanding the node.
8746  if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
8748  Subtarget.hasP8Vector()))
8749  return Op;
8750  return SDValue();
8751  }
8752 
8753  uint64_t SplatBits = APSplatBits.getZExtValue();
8754  uint64_t SplatUndef = APSplatUndef.getZExtValue();
8755  unsigned SplatSize = SplatBitSize / 8;
8756 
8757  // First, handle single instruction cases.
8758 
8759  // All zeros?
8760  if (SplatBits == 0) {
8761  // Canonicalize all zero vectors to be v4i32.
8762  if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
8763  SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
8764  Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
8765  }
8766  return Op;
8767  }
8768 
8769  // We have XXSPLTIW for constant splats four bytes wide.
8770  // Given vector length is a multiple of 4, 2-byte splats can be replaced
8771  // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
8772  // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
8773  // turned into a 4-byte splat of 0xABABABAB.
8774  if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
8775  return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
8776  Op.getValueType(), DAG, dl);
8777 
8778  if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
8779  return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
8780  dl);
8781 
8782  // We have XXSPLTIB for constant splats one byte wide.
8783  if (Subtarget.hasP9Vector() && SplatSize == 1)
8784  return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
8785  dl);
8786 
8787  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
8788  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
8789  (32-SplatBitSize));
8790  if (SextVal >= -16 && SextVal <= 15)
8791  return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
8792  dl);
8793 
8794  // Two instruction sequences.
8795 
8796  // If this value is in the range [-32,30] and is even, use:
8797  // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
8798  // If this value is in the range [17,31] and is odd, use:
8799  // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
8800  // If this value is in the range [-31,-17] and is odd, use:
8801  // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
8802  // Note the last two are three-instruction sequences.
8803  if (SextVal >= -32 && SextVal <= 31) {
8804  // To avoid having these optimizations undone by constant folding,
8805  // we convert to a pseudo that will be expanded later into one of
8806  // the above forms.
8807  SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
8808  EVT VT = (SplatSize == 1 ? MVT::v16i8 :
8809  (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
8810  SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
8811  SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
8812  if (VT == Op.getValueType())
8813  return RetVal;
8814  else
8815  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
8816  }
8817 
8818  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
8819  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
8820  // for fneg/fabs.
8821  if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
8822  // Make -1 and vspltisw -1:
8823  SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
8824 
8825  // Make the VSLW intrinsic, computing 0x8000_0000.
8826  SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
8827  OnesV, DAG, dl);
8828 
8829  // xor by OnesV to invert it.
8830  Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
8831  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8832  }
8833 
8834  // Check to see if this is a wide variety of vsplti*, binop self cases.
8835  static const signed char SplatCsts[] = {
8836  -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
8837  -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
8838  };
8839 
8840  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
8841  // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
8842  // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
8843  int i = SplatCsts[idx];
8844 
8845  // Figure out what shift amount will be used by altivec if shifted by i in
8846  // this splat size.
8847  unsigned TypeShiftAmt = i & (SplatBitSize-1);
8848 
8849  // vsplti + shl self.
8850  if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
8851  SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
8852  static const unsigned IIDs[] = { // Intrinsic to use for each size.
8853  Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
8854  Intrinsic::ppc_altivec_vslw
8855  };
8856  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8857  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8858  }
8859 
8860  // vsplti + srl self.
8861  if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
8862  SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
8863  static const unsigned IIDs[] = { // Intrinsic to use for each size.
8864  Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
8865  Intrinsic::ppc_altivec_vsrw
8866  };
8867  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8868  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8869  }
8870 
8871  // vsplti + rol self.
8872  if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
8873  ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
8874  SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
8875  static const unsigned IIDs[] = { // Intrinsic to use for each size.
8876  Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
8877  Intrinsic::ppc_altivec_vrlw
8878  };
8879  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8880  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8881  }
8882 
8883  // t = vsplti c, result = vsldoi t, t, 1
8884  if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
8885  SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
8886  unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
8887  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8888  }
8889  // t = vsplti c, result = vsldoi t, t, 2
8890  if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
8891  SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
8892  unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
8893  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8894  }
8895  // t = vsplti c, result = vsldoi t, t, 3
8896  if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
8897  SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
8898  unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
8899  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8900  }
8901  }
8902 
8903  return SDValue();
8904 }
8905 
8906 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8907 /// the specified operations to build the shuffle.
8908 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8909  SDValue RHS, SelectionDAG &DAG,
8910  const SDLoc &dl) {
8911  unsigned OpNum = (PFEntry >> 26) & 0x0F;
8912  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8913  unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8914 
8915  enum {
8916  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8917  OP_VMRGHW,
8918  OP_VMRGLW,
8919  OP_VSPLTISW0,
8920  OP_VSPLTISW1,
8921  OP_VSPLTISW2,
8922  OP_VSPLTISW3,
8923  OP_VSLDOI4,
8924  OP_VSLDOI8,
8925  OP_VSLDOI12
8926  };
8927 
8928  if (OpNum == OP_COPY) {
8929  if (LHSID == (1*9+2)*9+3) return LHS;
8930  assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8931  return RHS;
8932  }
8933 
8934  SDValue OpLHS, OpRHS;
8935  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8936  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8937 
8938  int ShufIdxs[16];
8939  switch (OpNum) {
8940  default: llvm_unreachable("Unknown i32 permute!");
8941  case OP_VMRGHW:
8942  ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
8943  ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
8944  ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
8945  ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
8946  break;
8947  case OP_VMRGLW:
8948  ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
8949  ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
8950  ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
8951  ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
8952  break;
8953  case OP_VSPLTISW0:
8954  for (unsigned i = 0; i != 16; ++i)
8955  ShufIdxs[i] = (i&3)+0;
8956  break;
8957  case OP_VSPLTISW1:
8958  for (unsigned i = 0; i != 16; ++i)
8959  ShufIdxs[i] = (i&3)+4;
8960  break;
8961  case OP_VSPLTISW2:
8962  for (unsigned i = 0; i != 16; ++i)
8963  ShufIdxs[i] = (i&3)+8;
8964  break;
8965  case OP_VSPLTISW3:
8966  for (unsigned i = 0; i != 16; ++i)
8967  ShufIdxs[i] = (i&3)+12;
8968  break;
8969  case OP_VSLDOI4:
8970  return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
8971  case OP_VSLDOI8:
8972  return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
8973  case OP_VSLDOI12:
8974  return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
8975  }
8976  EVT VT = OpLHS.getValueType();
8977  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
8978  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
8979  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
8980  return DAG.getNode(ISD::BITCAST, dl, VT, T);
8981 }
8982 
8983 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
8984 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
8985 /// SDValue.
8986 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
8987  SelectionDAG &DAG) const {
8988  const unsigned BytesInVector = 16;
8989  bool IsLE = Subtarget.isLittleEndian();
8990  SDLoc dl(N);
8991  SDValue V1 = N->getOperand(0);
8992  SDValue V2 = N->getOperand(1);
8993  unsigned ShiftElts = 0, InsertAtByte = 0;
8994  bool Swap = false;
8995 
8996  // Shifts required to get the byte we want at element 7.
8997  unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
8998  0, 15, 14, 13, 12, 11, 10, 9};
8999  unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9000  1, 2, 3, 4, 5, 6, 7, 8};
9001 
9002  ArrayRef<int> Mask = N->getMask();
9003  int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9004 
9005  // For each mask element, find out if we're just inserting something
9006  // from V2 into V1 or vice versa.
9007  // Possible permutations inserting an element from V2 into V1:
9008  // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9009  // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9010  // ...
9011  // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9012  // Inserting from V1 into V2 will be similar, except mask range will be
9013  // [16,31].
9014 
9015  bool FoundCandidate = false;
9016  // If both vector operands for the shuffle are the same vector, the mask
9017  // will contain only elements from the first one and the second one will be
9018  // undef.
9019  unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9020  // Go through the mask of half-words to find an element that's being moved
9021  // from one vector to the other.
9022  for (unsigned i = 0; i < BytesInVector; ++i) {
9023  unsigned CurrentElement = Mask[i];
9024  // If 2nd operand is undefined, we should only look for element 7 in the
9025  // Mask.
9026  if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9027  continue;
9028 
9029  bool OtherElementsInOrder = true;
9030  // Examine the other elements in the Mask to see if they're in original
9031  // order.
9032  for (unsigned j = 0; j < BytesInVector; ++j) {
9033  if (j == i)
9034  continue;
9035  // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9036  // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9037  // in which we always assume we're always picking from the 1st operand.
9038  int MaskOffset =
9039  (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9040  if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9041  OtherElementsInOrder = false;
9042  break;
9043  }
9044  }
9045  // If other elements are in original order, we record the number of shifts
9046  // we need to get the element we want into element 7. Also record which byte
9047  // in the vector we should insert into.
9048  if (OtherElementsInOrder) {
9049  // If 2nd operand is undefined, we assume no shifts and no swapping.
9050  if (V2.isUndef()) {
9051  ShiftElts = 0;
9052  Swap = false;
9053  } else {
9054  // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9055  ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9056  : BigEndianShifts[CurrentElement & 0xF];
9057  Swap = CurrentElement < BytesInVector;
9058  }
9059  InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9060  FoundCandidate = true;
9061  break;
9062  }
9063  }
9064 
9065  if (!FoundCandidate)
9066  return SDValue();
9067 
9068  // Candidate found, construct the proper SDAG sequence with VINSERTB,
9069  // optionally with VECSHL if shift is required.
9070  if (Swap)
9071  std::swap(V1, V2);
9072  if (V2.isUndef())
9073  V2 = V1;
9074  if (ShiftElts) {
9075  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9076  DAG.getConstant(ShiftElts, dl, MVT::i32));
9077  return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9078  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9079  }
9080  return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9081  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9082 }
9083 
9084 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9085 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9086 /// SDValue.
9087 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9088  SelectionDAG &DAG) const {
9089  const unsigned NumHalfWords = 8;
9090  const unsigned BytesInVector = NumHalfWords * 2;
9091  // Check that the shuffle is on half-words.
9092  if (!isNByteElemShuffleMask(N, 2, 1))
9093  return SDValue();
9094 
9095  bool IsLE = Subtarget.isLittleEndian();
9096  SDLoc dl(N);
9097  SDValue V1 = N->getOperand(0);
9098  SDValue V2 = N->getOperand(1);
9099  unsigned ShiftElts = 0, InsertAtByte = 0;
9100  bool Swap = false;
9101 
9102  // Shifts required to get the half-word we want at element 3.
9103  unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9104  unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9105 
9106  uint32_t Mask = 0;
9107  uint32_t OriginalOrderLow = 0x1234567;
9108  uint32_t OriginalOrderHigh = 0x89ABCDEF;
9109  // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9110  // 32-bit space, only need 4-bit nibbles per element.
9111  for (unsigned i = 0; i < NumHalfWords; ++i) {
9112  unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9113  Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9114  }
9115 
9116  // For each mask element, find out if we're just inserting something
9117  // from V2 into V1 or vice versa. Possible permutations inserting an element
9118  // from V2 into V1:
9119  // X, 1, 2, 3, 4, 5, 6, 7
9120  // 0, X, 2, 3, 4, 5, 6, 7
9121  // 0, 1, X, 3, 4, 5, 6, 7
9122  // 0, 1, 2, X, 4, 5, 6, 7
9123  // 0, 1, 2, 3, X, 5, 6, 7
9124  // 0, 1, 2, 3, 4, X, 6, 7
9125  // 0, 1, 2, 3, 4, 5, X, 7
9126  // 0, 1, 2, 3, 4, 5, 6, X
9127  // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9128 
9129  bool FoundCandidate = false;
9130  // Go through the mask of half-words to find an element that's being moved
9131  // from one vector to the other.
9132  for (unsigned i = 0; i < NumHalfWords; ++i) {
9133  unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9134  uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9135  uint32_t MaskOtherElts = ~(0xF << MaskShift);
9136  uint32_t TargetOrder = 0x0;
9137 
9138  // If both vector operands for the shuffle are the same vector, the mask
9139  // will contain only elements from the first one and the second one will be
9140  // undef.
9141  if (V2.isUndef()) {
9142  ShiftElts = 0;
9143  unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9144  TargetOrder = OriginalOrderLow;
9145  Swap = false;
9146  // Skip if not the correct element or mask of other elements don't equal
9147  // to our expected order.
9148  if (MaskOneElt == VINSERTHSrcElem &&
9149  (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9150  InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9151  FoundCandidate = true;
9152  break;
9153  }
9154  } else { // If both operands are defined.
9155  // Target order is [8,15] if the current mask is between [0,7].
9156  TargetOrder =
9157  (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9158  // Skip if mask of other elements don't equal our expected order.
9159  if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9160  // We only need the last 3 bits for the number of shifts.
9161  ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9162  : BigEndianShifts[MaskOneElt & 0x7];
9163  InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9164  Swap = MaskOneElt < NumHalfWords;
9165  FoundCandidate = true;
9166  break;
9167  }
9168  }
9169  }
9170 
9171  if (!FoundCandidate)
9172  return SDValue();
9173 
9174  // Candidate found, construct the proper SDAG sequence with VINSERTH,
9175  // optionally with VECSHL if shift is required.
9176  if (Swap)
9177  std::swap(V1, V2);
9178  if (V2.isUndef())
9179  V2 = V1;
9180  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9181  if (ShiftElts) {
9182  // Double ShiftElts because we're left shifting on v16i8 type.
9183  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9184  DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9185  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9186  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9187  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9188  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9189  }
9190  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9191  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9192  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9193  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9194 }
9195 
9196 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9197 /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9198 /// return the default SDValue.
9199 SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9200  SelectionDAG &DAG) const {
9201  // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9202  // to v16i8. Peek through the bitcasts to get the actual operands.
9203  SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9204  SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9205 
9206  auto ShuffleMask = SVN->getMask();
9207  SDValue VecShuffle(SVN, 0);
9208  SDLoc DL(SVN);
9209 
9210  // Check that we have a four byte shuffle.
9211  if (!isNByteElemShuffleMask(SVN, 4, 1))
9212  return SDValue();
9213 
9214  // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9215  if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9216  std::swap(LHS, RHS);
9218  ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9219  }
9220 
9221  // Ensure that the RHS is a vector of constants.
9222  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9223  if (!BVN)
9224  return SDValue();
9225 
9226  // Check if RHS is a splat of 4-bytes (or smaller).
9227  APInt APSplatValue, APSplatUndef;
9228  unsigned SplatBitSize;
9229  bool HasAnyUndefs;
9230  if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9231  HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9232  SplatBitSize > 32)
9233  return SDValue();
9234 
9235  // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9236  // The instruction splats a constant C into two words of the source vector
9237  // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9238  // Thus we check that the shuffle mask is the equivalent of
9239  // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9240  // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9241  // within each word are consecutive, so we only need to check the first byte.
9242  SDValue Index;
9243  bool IsLE = Subtarget.isLittleEndian();
9244  if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9245  (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9246  ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9247  Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9248  else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9249  (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9250  ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9251  Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9252  else
9253  return SDValue();
9254 
9255  // If the splat is narrower than 32-bits, we need to get the 32-bit value
9256  // for XXSPLTI32DX.
9257  unsigned SplatVal = APSplatValue.getZExtValue();
9258  for (; SplatBitSize < 32; SplatBitSize <<= 1)
9259  SplatVal |= (SplatVal << SplatBitSize);
9260 
9261  SDValue SplatNode = DAG.getNode(
9263  Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9264  return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9265 }
9266 
9267 /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9268 /// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9269 /// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9270 /// i.e (or (shl x, C1), (srl x, 128-C1)).
9271 SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9272  assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9273  assert(Op.getValueType() == MVT::v1i128 &&
9274  "Only set v1i128 as custom, other type shouldn't reach here!");
9275  SDLoc dl(Op);
9276  SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9277  SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9278  unsigned SHLAmt = N1.getConstantOperandVal(0);
9279  if (SHLAmt % 8 == 0) {
9280  SmallVector<int, 16> Mask(16, 0);
9281  std::iota(Mask.begin(), Mask.end(), 0);
9282  std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9283  if (SDValue Shuffle =
9285  DAG.getUNDEF(MVT::v16i8), Mask))
9286  return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9287  }
9288  SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9289  SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9290  DAG.getConstant(SHLAmt, dl, MVT::i32));
9291  SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9292  DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9293  SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9294  return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9295 }
9296 
9297 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
9298 /// is a shuffle we can handle in a single instruction, return it. Otherwise,
9299 /// return the code it can be lowered into. Worst case, it can always be
9300 /// lowered into a vperm.
9301 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9302  SelectionDAG &DAG) const {
9303  SDLoc dl(Op);
9304  SDValue V1 = Op.getOperand(0);
9305  SDValue V2 = Op.getOperand(1);
9306  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9307 
9308  // Any nodes that were combined in the target-independent combiner prior
9309  // to vector legalization will not be sent to the target combine. Try to
9310  // combine it here.
9311  if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9312  if (!isa<ShuffleVectorSDNode>(NewShuffle))
9313  return NewShuffle;
9314  Op = NewShuffle;
9315  SVOp = cast<ShuffleVectorSDNode>(Op);
9316  V1 = Op.getOperand(0);
9317  V2 = Op.getOperand(1);
9318  }
9319  EVT VT = Op.getValueType();
9320  bool isLittleEndian = Subtarget.isLittleEndian();
9321 
9322  unsigned ShiftElts, InsertAtByte;
9323  bool Swap = false;
9324 
9325  // If this is a load-and-splat, we can do that with a single instruction
9326  // in some cases. However if the load has multiple uses, we don't want to
9327  // combine it because that will just produce multiple loads.
9328  bool IsPermutedLoad = false;
9329  const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9330  if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9331  (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9332  InputLoad->hasOneUse()) {
9333  bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9334  int SplatIdx =
9335  PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9336 
9337  // The splat index for permuted loads will be in the left half of the vector
9338  // which is strictly wider than the loaded value by 8 bytes. So we need to
9339  // adjust the splat index to point to the correct address in memory.
9340  if (IsPermutedLoad) {
9341  assert(isLittleEndian && "Unexpected permuted load on big endian target");
9342  SplatIdx += IsFourByte ? 2 : 1;
9343  assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9344  "Splat of a value outside of the loaded memory");
9345  }
9346 
9347  LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9348  // For 4-byte load-and-splat, we need Power9.
9349  if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9350  uint64_t Offset = 0;
9351  if (IsFourByte)
9352  Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9353  else
9354  Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9355 
9356  SDValue BasePtr = LD->getBasePtr();
9357  if (Offset != 0)
9359  BasePtr, DAG.getIntPtrConstant(Offset, dl));
9360  SDValue Ops[] = {
9361  LD->getChain(), // Chain
9362  BasePtr, // BasePtr
9363  DAG.getValueType(Op.getValueType()) // VT
9364  };
9365  SDVTList VTL =
9366  DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9367  SDValue LdSplt =
9369  Ops, LD->getMemoryVT(), LD->getMemOperand());
9370  DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
9371  if (LdSplt.getValueType() != SVOp->getValueType(0))
9372  LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9373  return LdSplt;
9374  }
9375  }
9376  if (Subtarget.hasP9Vector() &&
9377  PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9378  isLittleEndian)) {
9379  if (Swap)
9380  std::swap(V1, V2);
9381  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9382  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9383  if (ShiftElts) {
9384  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
9385  DAG.getConstant(ShiftElts, dl, MVT::i32));
9386  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
9387  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9388  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9389  }
9390  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
9391  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9392  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9393  }
9394 
9395  if (Subtarget.hasPrefixInstrs()) {
9396  SDValue SplatInsertNode;
9397  if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9398  return SplatInsertNode;
9399  }
9400 
9401  if (Subtarget.hasP9Altivec()) {
9402  SDValue NewISDNode;
9403  if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9404  return NewISDNode;
9405 
9406  if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9407  return NewISDNode;
9408  }
9409 
9410  if (Subtarget.hasVSX() &&
9411  PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9412  if (Swap)
9413  std::swap(V1, V2);
9414  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9415  SDValue Conv2 =
9416  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
9417 
9418  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
9419  DAG.getConstant(ShiftElts, dl, MVT::i32));
9420  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
9421  }
9422 
9423  if (Subtarget.hasVSX() &&
9424  PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9425  if (Swap)
9426  std::swap(V1, V2);
9427  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9428  SDValue Conv2 =
9429  DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
9430 
9431  SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
9432  DAG.getConstant(ShiftElts, dl, MVT::i32));
9433  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
9434  }
9435 
9436  if (Subtarget.hasP9Vector()) {
9437  if (PPC::isXXBRHShuffleMask(SVOp)) {
9438  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9439  SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
9440  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
9441  } else if (PPC::isXXBRWShuffleMask(SVOp)) {
9442  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9443  SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
9444  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
9445  } else if (PPC::isXXBRDShuffleMask(SVOp)) {
9446  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9447  SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
9448  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
9449  } else if (PPC::isXXBRQShuffleMask(SVOp)) {
9450  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
9451  SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
9452  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
9453  }
9454  }
9455 
9456  if (Subtarget.hasVSX()) {
9457  if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
9458  int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
9459 
9460  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9461  SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
9462  DAG.getConstant(SplatIdx, dl, MVT::i32));
9463  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
9464  }
9465 
9466  // Left shifts of 8 bytes are actually swaps. Convert accordingly.
9467  if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
9468  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
9469  SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
9470  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
9471  }
9472  }
9473 
9474  // Cases that are handled by instructions that take permute immediates
9475  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
9476  // selected by the instruction selector.
9477  if (V2.isUndef()) {
9478  if (PPC::isSplatShuffleMask(SVOp, 1) ||
9479  PPC::isSplatShuffleMask(SVOp, 2) ||
9480  PPC::isSplatShuffleMask(SVOp, 4) ||
9481  PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
9482  PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
9483  PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
9484  PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
9485  PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
9486  PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
9487  PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
9488  PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
9489  PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
9490  (Subtarget.hasP8Altivec() && (
9491  PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
9492  PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
9493  PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
9494  return Op;
9495  }
9496  }
9497 
9498  // Altivec has a variety of "shuffle immediates" that take two vector inputs
9499  // and produce a fixed permutation. If any of these match, do not lower to
9500  // VPERM.
9501  unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
9502  if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9503  PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9504  PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
9505  PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9506  PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9507  PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9508  PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9509  PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9510  PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9511  (Subtarget.hasP8Altivec() && (
9512  PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9513  PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
9514  PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
9515  return Op;
9516 
9517  // Check to see if this is a shuffle of 4-byte values. If so, we can use our
9518  // perfect shuffle table to emit an optimal matching sequence.
9519  ArrayRef<int> PermMask = SVOp->getMask();
9520 
9521  unsigned PFIndexes[4];
9522  bool isFourElementShuffle = true;
9523  for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
9524  unsigned EltNo = 8; // Start out undef.
9525  for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
9526  if (PermMask[i*4+j] < 0)
9527  continue; // Undef, ignore it.
9528 
9529  unsigned ByteSource = PermMask[i*4+j];
9530  if ((ByteSource & 3) != j) {
9531  isFourElementShuffle = false;
9532  break;
9533  }
9534 
9535  if (EltNo == 8) {
9536  EltNo = ByteSource/4;
9537  } else if (EltNo != ByteSource/4) {
9538  isFourElementShuffle = false;
9539  break;
9540  }
9541  }
9542  PFIndexes[i] = EltNo;
9543  }
9544 
9545  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
9546  // perfect shuffle vector to determine if it is cost effective to do this as
9547  // discrete instructions, or whether we should use a vperm.
9548  // For now, we skip this for little endian until such time as we have a
9549  // little-endian perfect shuffle table.
9550  if (isFourElementShuffle && !isLittleEndian) {
9551  // Compute the index in the perfect shuffle table.
9552  unsigned PFTableIndex =
9553  PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
9554 
9555  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
9556  unsigned Cost = (PFEntry >> 30);
9557 
9558  // Determining when to avoid vperm is tricky. Many things affect the cost
9559  // of vperm, particularly how many times the perm mask needs to be computed.
9560  // For example, if the perm mask can be hoisted out of a loop or is already
9561  // used (perhaps because there are multiple permutes with the same shuffle
9562  // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
9563  // the loop requires an extra register.
9564  //
9565  // As a compromise, we only emit discrete instructions if the shuffle can be
9566  // generated in 3 or fewer operations. When we have loop information
9567  // available, if this block is within a loop, we should avoid using vperm
9568  // for 3-operation perms and use a constant pool load instead.
9569  if (Cost < 3)
9570  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9571  }
9572 
9573  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
9574  // vector that will get spilled to the constant pool.
9575  if (V2.isUndef()) V2 = V1;
9576 
9577  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
9578  // that it is in input element units, not in bytes. Convert now.
9579 
9580  // For little endian, the order of the input vectors is reversed, and
9581  // the permutation mask is complemented with respect to 31. This is
9582  // necessary to produce proper semantics with the big-endian-biased vperm
9583  // instruction.
9584  EVT EltVT = V1.getValueType().getVectorElementType();
9585  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
9586 
9587  SmallVector<SDValue, 16> ResultMask;
9588  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
9589  unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
9590 
9591  for (unsigned j = 0; j != BytesPerElement; ++j)
9592  if (isLittleEndian)
9593  ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
9594  dl, MVT::i32));
9595  else
9596  ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
9597  MVT::i32));
9598  }
9599 
9600  ShufflesHandledWithVPERM++;
9601  SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
9602  LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
9603  LLVM_DEBUG(SVOp->dump());
9604  LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
9605  LLVM_DEBUG(VPermMask.dump());
9606 
9607  if (isLittleEndian)
9608  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9609  V2, V1, VPermMask);
9610  else
9611  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9612  V1, V2, VPermMask);
9613 }
9614 
9615 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
9616 /// vector comparison. If it is, return true and fill in Opc/isDot with
9617 /// information about the intrinsic.
9618 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
9619  bool &isDot, const PPCSubtarget &Subtarget) {
9620  unsigned IntrinsicID =
9621  cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
9622  CompareOpc = -1;
9623  isDot = false;
9624  switch (IntrinsicID) {
9625  default:
9626  return false;
9627  // Comparison predicates.
9628  case Intrinsic::ppc_altivec_vcmpbfp_p:
9629  CompareOpc = 966;
9630  isDot = true;
9631  break;
9632  case Intrinsic::ppc_altivec_vcmpeqfp_p:
9633  CompareOpc = 198;
9634  isDot = true;
9635  break;
9636  case Intrinsic::ppc_altivec_vcmpequb_p:
9637  CompareOpc = 6;
9638  isDot = true;
9639  break;
9640  case Intrinsic::ppc_altivec_vcmpequh_p:
9641  CompareOpc = 70;
9642  isDot = true;
9643  break;
9644  case Intrinsic::ppc_altivec_vcmpequw_p:
9645  CompareOpc = 134;
9646  isDot = true;
9647  break;
9648  case Intrinsic::ppc_altivec_vcmpequd_p:
9649  if (Subtarget.hasP8Altivec()) {
9650  CompareOpc = 199;
9651  isDot = true;
9652  } else
9653  return false;
9654  break;
9655  case Intrinsic::ppc_altivec_vcmpneb_p:
9656  case Intrinsic::ppc_altivec_vcmpneh_p:
9657  case Intrinsic::ppc_altivec_vcmpnew_p:
9658  case Intrinsic::ppc_altivec_vcmpnezb_p:
9659  case Intrinsic::ppc_altivec_vcmpnezh_p:
9660  case Intrinsic::ppc_altivec_vcmpnezw_p:
9661  if (Subtarget.hasP9Altivec()) {
9662  switch (IntrinsicID) {
9663  default:
9664  llvm_unreachable("Unknown comparison intrinsic.");
9665  case Intrinsic::ppc_altivec_vcmpneb_p:
9666  CompareOpc = 7;
9667  break;
9668  case Intrinsic::ppc_altivec_vcmpneh_p:
9669  CompareOpc = 71;
9670  break;
9671  case Intrinsic::ppc_altivec_vcmpnew_p:
9672  CompareOpc = 135;
9673  break;
9674  case Intrinsic::ppc_altivec_vcmpnezb_p:
9675  CompareOpc = 263;
9676  break;
9677  case Intrinsic::ppc_altivec_vcmpnezh_p:
9678  CompareOpc = 327;
9679  break;
9680  case Intrinsic::ppc_altivec_vcmpnezw_p:
9681  CompareOpc = 391;
9682  break;
9683  }
9684  isDot = true;
9685  } else
9686  return false;
9687  break;
9688  case Intrinsic::ppc_altivec_vcmpgefp_p:
9689  CompareOpc = 454;
9690  isDot = true;
9691  break;
9692  case Intrinsic::ppc_altivec_vcmpgtfp_p:
9693  CompareOpc = 710;
9694  isDot = true;
9695  break;
9696  case Intrinsic::ppc_altivec_vcmpgtsb_p:
9697  CompareOpc = 774;
9698  isDot = true;
9699  break;
9700  case Intrinsic::ppc_altivec_vcmpgtsh_p:
9701  CompareOpc = 838;
9702  isDot = true;
9703  break;
9704  case Intrinsic::ppc_altivec_vcmpgtsw_p:
9705  CompareOpc = 902;
9706  isDot = true;
9707  break;
9708  case Intrinsic::ppc_altivec_vcmpgtsd_p:
9709  if (Subtarget.hasP8Altivec()) {
9710  CompareOpc = 967;
9711  isDot = true;
9712  } else
9713  return false;
9714  break;
9715  case Intrinsic::ppc_altivec_vcmpgtub_p:
9716  CompareOpc = 518;
9717  isDot = true;
9718  break;
9719  case Intrinsic::ppc_altivec_vcmpgtuh_p:
9720  CompareOpc = 582;
9721  isDot = true;
9722  break;
9723  case Intrinsic::ppc_altivec_vcmpgtuw_p:
9724  CompareOpc = 646;
9725  isDot = true;
9726  break;
9727  case Intrinsic::ppc_altivec_vcmpgtud_p:
9728  if (Subtarget.hasP8Altivec()) {
9729  CompareOpc = 711;
9730  isDot = true;
9731  } else
9732  return false;
9733  break;
9734 
9735  case Intrinsic::ppc_altivec_vcmpequq:
9736  case Intrinsic::ppc_altivec_vcmpgtsq:
9737  case Intrinsic::ppc_altivec_vcmpgtuq:
9738  if (!Subtarget.isISA3_1())
9739  return false;
9740  switch (IntrinsicID) {
9741  default:
9742  llvm_unreachable("Unknown comparison intrinsic.");
9743  case Intrinsic::ppc_altivec_vcmpequq:
9744  CompareOpc = 455;
9745  break;
9746  case Intrinsic::ppc_altivec_vcmpgtsq:
9747  CompareOpc = 903;
9748  break;
9749  case Intrinsic::ppc_altivec_vcmpgtuq:
9750  CompareOpc = 647;
9751  break;
9752  }
9753  break;
9754 
9755  // VSX predicate comparisons use the same infrastructure
9756  case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9757  case Intrinsic::ppc_vsx_xvcmpgedp_p:
9758  case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9759  case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9760  case Intrinsic::ppc_vsx_xvcmpgesp_p:
9761  case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9762  if (Subtarget.hasVSX()) {
9763  switch (IntrinsicID) {
9764  case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9765  CompareOpc = 99;
9766  break;
9767  case Intrinsic::ppc_vsx_xvcmpgedp_p:
9768  CompareOpc = 115;
9769  break;
9770  case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9771  CompareOpc = 107;
9772  break;
9773  case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9774  CompareOpc = 67;
9775  break;
9776  case Intrinsic::ppc_vsx_xvcmpgesp_p:
9777  CompareOpc = 83;
9778  break;
9779  case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9780  CompareOpc = 75;
9781  break;
9782  }
9783  isDot = true;
9784  } else
9785  return false;
9786  break;
9787 
9788  // Normal Comparisons.
9789  case Intrinsic::ppc_altivec_vcmpbfp:
9790  CompareOpc = 966;
9791  break;
9792  case Intrinsic::ppc_altivec_vcmpeqfp:
9793  CompareOpc = 198;
9794  break;
9795  case Intrinsic::ppc_altivec_vcmpequb:
9796  CompareOpc = 6;
9797  break;
9798  case Intrinsic::ppc_altivec_vcmpequh:
9799  CompareOpc = 70;
9800  break;
9801  case Intrinsic::ppc_altivec_vcmpequw:
9802  CompareOpc = 134;
9803  break;
9804  case Intrinsic::ppc_altivec_vcmpequd:
9805  if (Subtarget.hasP8Altivec())
9806  CompareOpc = 199;
9807  else
9808  return false;
9809  break;
9810  case Intrinsic::ppc_altivec_vcmpneb:
9811  case Intrinsic::ppc_altivec_vcmpneh:
9812  case Intrinsic::ppc_altivec_vcmpnew:
9813  case Intrinsic::ppc_altivec_vcmpnezb:
9814  case Intrinsic::ppc_altivec_vcmpnezh:
9815  case Intrinsic::ppc_altivec_vcmpnezw:
9816  if (Subtarget.hasP9Altivec())
9817  switch (IntrinsicID) {
9818  default:
9819  llvm_unreachable("Unknown comparison intrinsic.");
9820  case Intrinsic::ppc_altivec_vcmpneb:
9821  CompareOpc = 7;
9822  break;
9823  case Intrinsic::ppc_altivec_vcmpneh:
9824  CompareOpc = 71;
9825  break;
9826  case Intrinsic::ppc_altivec_vcmpnew:
9827  CompareOpc = 135;
9828  break;
9829  case Intrinsic::ppc_altivec_vcmpnezb:
9830  CompareOpc = 263;
9831  break;
9832  case Intrinsic::ppc_altivec_vcmpnezh:
9833  CompareOpc = 327;
9834  break;
9835  case Intrinsic::ppc_altivec_vcmpnezw:
9836  CompareOpc = 391;
9837  break;
9838  }
9839  else
9840  return false;
9841  break;
9842  case Intrinsic::ppc_altivec_vcmpgefp:
9843  CompareOpc = 454;
9844  break;
9845  case Intrinsic::ppc_altivec_vcmpgtfp:
9846  CompareOpc = 710;
9847  break;
9848  case Intrinsic::ppc_altivec_vcmpgtsb:
9849  CompareOpc = 774;
9850  break;
9851  case Intrinsic::ppc_altivec_vcmpgtsh:
9852  CompareOpc = 838;
9853  break;
9854  case Intrinsic::ppc_altivec_vcmpgtsw:
9855  CompareOpc = 902;
9856  break;
9857  case Intrinsic::ppc_altivec_vcmpgtsd:
9858  if (Subtarget.hasP8Altivec())
9859  CompareOpc = 967;
9860  else
9861  return false;
9862  break;
9863  case Intrinsic::ppc_altivec_vcmpgtub:
9864  CompareOpc = 518;
9865  break;
9866  case Intrinsic::ppc_altivec_vcmpgtuh:
9867  CompareOpc = 582;
9868  break;
9869  case Intrinsic::ppc_altivec_vcmpgtuw:
9870  CompareOpc = 646;
9871  break;
9872  case Intrinsic::ppc_altivec_vcmpgtud:
9873  if (Subtarget.hasP8Altivec())
9874  CompareOpc = 711;
9875  else
9876  return false;
9877  break;
9878  case Intrinsic::ppc_altivec_vcmpequq_p:
9879  case Intrinsic::ppc_altivec_vcmpgtsq_p:
9880  case Intrinsic::ppc_altivec_vcmpgtuq_p:
9881  if (!Subtarget.isISA3_1())
9882  return false;
9883  switch (IntrinsicID) {
9884  default:
9885  llvm_unreachable("Unknown comparison intrinsic.");
9886  case Intrinsic::ppc_altivec_vcmpequq_p:
9887  CompareOpc = 455;
9888  break;
9889  case Intrinsic::ppc_altivec_vcmpgtsq_p:
9890  CompareOpc = 903;
9891  break;
9892  case Intrinsic::ppc_altivec_vcmpgtuq_p:
9893  CompareOpc = 647;
9894  break;
9895  }
9896  isDot = true;
9897  break;
9898  }
9899  return true;
9900 }
9901 
9902 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
9903 /// lower, do it, otherwise return null.
9904 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9905  SelectionDAG &DAG) const {
9906  unsigned IntrinsicID =
9907  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
9908 
9909  SDLoc dl(Op);
9910 
9911  switch (IntrinsicID) {
9912  case Intrinsic::thread_pointer:
9913  // Reads the thread pointer register, used for __builtin_thread_pointer.
9914  if (Subtarget.isPPC64())
9915  return DAG.getRegister(PPC::X13, MVT::i64);
9916  return DAG.getRegister(PPC::R2, MVT::i32);
9917 
9918  case Intrinsic::ppc_mma_disassemble_acc:
9919  case Intrinsic::ppc_vsx_disassemble_pair: {
9920  int NumVecs = 2;
9921  SDValue WideVec = Op.getOperand(1);
9922  if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
9923  NumVecs = 4;
9924  WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
9925  }
9926  SmallVector<SDValue, 4> RetOps;
9927  for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
9928  SDValue Extract = DAG.getNode(
9929  PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
9930  DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
9931  : VecNo,
9932  dl, MVT::i64));
9933  RetOps.push_back(Extract);
9934  }
9935  return DAG.getMergeValues(RetOps, dl);
9936  }
9937  }
9938 
9939  // If this is a lowered altivec predicate compare, CompareOpc is set to the
9940  // opcode number of the comparison.
9941  int CompareOpc;
9942  bool isDot;
9943  if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
9944  return SDValue(); // Don't custom lower most intrinsics.
9945 
9946  // If this is a non-dot comparison, make the VCMP node and we are done.
9947  if (!isDot) {
9948  SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
9949  Op.getOperand(1), Op.getOperand(2),
9950  DAG.getConstant(CompareOpc, dl, MVT::i32));
9951  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
9952  }
9953 
9954  // Create the PPCISD altivec 'dot' comparison node.
9955  SDValue Ops[] = {
9956  Op.getOperand(2), // LHS
9957  Op.getOperand(3), // RHS
9958  DAG.getConstant(CompareOpc, dl, MVT::i32)
9959  };
9960  EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
9961  SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
9962 
9963  // Now that we have the comparison, emit a copy from the CR to a GPR.
9964  // This is flagged to the above dot comparison.
9965  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
9966  DAG.getRegister(PPC::CR6, MVT::i32),
9967  CompNode.getValue(1));
9968 
9969  // Unpack the result based on how the target uses it.
9970  unsigned BitNo; // Bit # of CR6.
9971  bool InvertBit; // Invert result?
9972  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
9973  default: // Can't happen, don't crash on invalid number though.
9974  case 0: // Return the value of the EQ bit of CR6.
9975  BitNo = 0; InvertBit = false;
9976  break;
9977  case 1: // Return the inverted value of the EQ bit of CR6.
9978  BitNo = 0; InvertBit = true;
9979  break;
9980  case 2: // Return the value of the LT bit of CR6.
9981  BitNo = 2; InvertBit = false;
9982  break;
9983  case 3: // Return the inverted value of the LT bit of CR6.
9984  BitNo = 2; InvertBit = true;
9985  break;
9986  }
9987 
9988  // Shift the bit into the low position.
9989  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
9990  DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
9991  // Isolate the bit.
9992  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
9993  DAG.getConstant(1, dl, MVT::i32));
9994 
9995  // If we are supposed to, toggle the bit.
9996  if (InvertBit)
9997  Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
9998  DAG.getConstant(1, dl, MVT::i32));
9999  return Flags;
10000 }
10001 
10002 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10003  SelectionDAG &DAG) const {
10004  // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10005  // the beginning of the argument list.
10006  int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10007  SDLoc DL(Op);
10008  switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10009  case Intrinsic::ppc_cfence: {
10010  assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10011  assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
10012  return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10014  Op.getOperand(ArgStart + 1)),
10015  Op.getOperand(0)),
10016  0);
10017  }
10018  default:
10019  break;
10020  }
10021  return SDValue();
10022 }
10023 
10024 // Lower scalar BSWAP64 to xxbrd.
10025 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10026  SDLoc dl(Op);
10027  // MTVSRDD
10028  Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10029  Op.getOperand(0));
10030  // XXBRD
10031  Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10032  // MFVSRD
10033  int VectorIndex = 0;
10034  if (Subtarget.isLittleEndian())
10035  VectorIndex = 1;
10037  DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10038  return Op;
10039 }
10040 
10041 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10042 // compared to a value that is atomically loaded (atomic loads zero-extend).
10043 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10044  SelectionDAG &DAG) const {
10045  assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
10046  "Expecting an atomic compare-and-swap here.");
10047  SDLoc dl(Op);
10048  auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10049  EVT MemVT = AtomicNode->getMemoryVT();
10050  if (MemVT.getSizeInBits() >= 32)
10051  return Op;
10052 
10053  SDValue CmpOp = Op.getOperand(2);
10054  // If this is already correctly zero-extended, leave it alone.
10055  auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10056  if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10057  return Op;
10058 
10059  // Clear the high bits of the compare operand.
10060  unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10061  SDValue NewCmpOp =
10062  DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10063  DAG.getConstant(MaskVal, dl, MVT::i32));
10064 
10065  // Replace the existing compare operand with the properly zero-extended one.
10067  for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10068  Ops.push_back(AtomicNode->getOperand(i));
10069  Ops[2] = NewCmpOp;
10070  MachineMemOperand *MMO = AtomicNode->getMemOperand();
10071  SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10072  auto NodeTy =
10074  return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10075 }
10076 
10077 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10078  SelectionDAG &DAG) const {
10079  SDLoc dl(Op);
10080  // Create a stack slot that is 16-byte aligned.
10082  int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10083  EVT PtrVT = getPointerTy(DAG.getDataLayout());
10084  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10085 
10086  // Store the input value into Value#0 of the stack slot.
10087  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10088  MachinePointerInfo());
10089  // Load it out.
10090  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10091 }
10092 
10093 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10094  SelectionDAG &DAG) const {
10095  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
10096  "Should only be called for ISD::INSERT_VECTOR_ELT");
10097 
10098  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10099  // We have legal lowering for constant indices but not for variable ones.
10100  if (!C)
10101  return SDValue();
10102 
10103  EVT VT = Op.getValueType();
10104  SDLoc dl(Op);
10105  SDValue V1 = Op.getOperand(0);
10106  SDValue V2 = Op.getOperand(1);
10107  // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10108  if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10109  SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10110  unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10111  unsigned InsertAtElement = C->getZExtValue();
10112  unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10113  if (Subtarget.isLittleEndian()) {
10114  InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10115  }
10116  return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10117  DAG.getConstant(InsertAtByte, dl, MVT::i32));
10118  }
10119  return Op;
10120 }
10121 
10122 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10123  SelectionDAG &DAG) const {
10124  SDLoc dl(Op);
10125  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10126  SDValue LoadChain = LN->getChain();
10127  SDValue BasePtr = LN->getBasePtr();
10128  EVT VT = Op.getValueType();
10129 
10130  if (VT != MVT::v256i1 && VT != MVT::v512i1)
10131  return Op;
10132 
10133  // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10134  // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
10135  // 2 or 4 vsx registers.
10136  assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
10137  "Type unsupported without MMA");
10138  assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10139  "Type unsupported without paired vector support");
10140  Align Alignment = LN->getAlign();
10142  SmallVector<SDValue, 4> LoadChains;
10143  unsigned NumVecs = VT.getSizeInBits() / 128;
10144  for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10145  SDValue Load =
10146  DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
10147  LN->getPointerInfo().getWithOffset(Idx * 16),
10148  commonAlignment(Alignment, Idx * 16),
10149  LN->getMemOperand()->getFlags(), LN->getAAInfo());
10150  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10151  DAG.getConstant(16, dl, BasePtr.getValueType()));
10152  Loads.push_back(Load);
10153  LoadChains.push_back(Load.getValue(1));
10154  }
10155  if (Subtarget.isLittleEndian()) {
10156  std::reverse(Loads.begin(), Loads.end());
10157  std::reverse(LoadChains.begin(), LoadChains.end());
10158  }
10159  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10160  SDValue Value =
10162  dl, VT, Loads);
10163  SDValue RetOps[] = {Value, TF};
10164  return DAG.getMergeValues(RetOps, dl);
10165 }
10166 
10167 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10168  SelectionDAG &DAG) const {
10169  SDLoc dl(Op);
10170  StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10171  SDValue StoreChain = SN->getChain();
10172  SDValue BasePtr = SN->getBasePtr();
10173  SDValue Value = SN->getValue();
10174  EVT StoreVT = Value.getValueType();
10175 
10176  if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
10177  return Op;
10178 
10179  // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10180  // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
10181  // underlying registers individually.
10182  assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
10183  "Type unsupported without MMA");
10184  assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10185  "Type unsupported without paired vector support");
10186  Align Alignment = SN->getAlign();
10187  SmallVector<SDValue, 4> Stores;
10188  unsigned NumVecs = 2;
10189  if (StoreVT == MVT::v512i1) {
10191  NumVecs = 4;
10192  }
10193  for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10194  unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10196  DAG.getConstant(VecNum, dl, MVT::i64));
10197  SDValue Store =
10198  DAG.getStore(StoreChain, dl, Elt, BasePtr,
10199  SN->getPointerInfo().getWithOffset(Idx * 16),
10200  commonAlignment(Alignment, Idx * 16),
10201  SN->getMemOperand()->getFlags(), SN->getAAInfo());
10202  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10203  DAG.getConstant(16, dl, BasePtr.getValueType()));
10204  Stores.push_back(Store);
10205  }
10206  SDValue TF = DAG.getTokenFactor(dl, Stores);
10207  return TF;
10208 }
10209 
10210 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10211  SDLoc dl(Op);
10212  if (Op.getValueType() == MVT::v4i32) {
10213  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10214 
10215  SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10216  // +16 as shift amt.
10217  SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10218  SDValue RHSSwap = // = vrlw RHS, 16
10219  BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10220 
10221  // Shrinkify inputs to v8i16.
10222  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10223  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10224  RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10225 
10226  // Low parts multiplied together, generating 32-bit results (we ignore the
10227  // top parts).
10228  SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10229  LHS, RHS, DAG, dl, MVT::v4i32);
10230 
10231  SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10232  LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10233  // Shift the high parts up 16 bits.
10234  HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10235  Neg16, DAG, dl);
10236  return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10237  } else if (Op.getValueType() == MVT::v16i8) {
10238  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10239  bool isLittleEndian = Subtarget.isLittleEndian();
10240 
10241  // Multiply the even 8-bit parts, producing 16-bit sums.
10242  SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10243  LHS, RHS, DAG, dl, MVT::v8i16);
10244  EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
10245 
10246  // Multiply the odd 8-bit parts, producing 16-bit sums.
10247  SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
10248  LHS, RHS, DAG, dl, MVT::v8i16);
10249  OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
10250 
10251  // Merge the results together. Because vmuleub and vmuloub are
10252  // instructions with a big-endian bias, we must reverse the
10253  // element numbering and reverse the meaning of "odd" and "even"
10254  // when generating little endian code.
10255  int Ops[16];
10256  for (unsigned i = 0; i != 8; ++i) {
10257  if (isLittleEndian) {
10258  Ops[i*2 ] = 2*i;
10259  Ops[i*2+1] = 2*i+16;
10260  } else {
10261  Ops[i*2 ] = 2*i+1;
10262  Ops[i*2+1] = 2*i+1+16;
10263  }
10264  }
10265  if (isLittleEndian)
10266  return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
10267  else
10268  return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
10269  } else {
10270  llvm_unreachable("Unknown mul to lower!");
10271  }
10272 }
10273 
10274 SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
10275  bool IsStrict = Op->isStrictFPOpcode();
10276  if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
10277  !Subtarget.hasP9Vector())
10278  return SDValue();
10279 
10280  return Op;
10281 }
10282 
10283 // Custom lowering for fpext vf32 to v2f64
10284 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
10285 
10286  assert(Op.getOpcode() == ISD::FP_EXTEND &&
10287  "Should only be called for ISD::FP_EXTEND");
10288 
10289  // FIXME: handle extends from half precision float vectors on P9.
10290  // We only want to custom lower an extend from v2f32 to v2f64.
10291  if (Op.getValueType() != MVT::v2f64 ||
10292  Op.getOperand(0).getValueType() != MVT::v2f32)
10293  return SDValue();
10294 
10295  SDLoc dl(Op);
10296  SDValue Op0 = Op.getOperand(0);
10297 
10298  switch (Op0.getOpcode()) {
10299  default:
10300  return SDValue();
10301  case ISD::EXTRACT_SUBVECTOR: {
10302  assert(Op0.getNumOperands() == 2 &&
10303  isa<ConstantSDNode>(Op0->getOperand(1)) &&
10304  "Node should have 2 operands with second one being a constant!");
10305 
10306  if (Op0.getOperand(0).getValueType() != MVT::v4f32)
10307  return SDValue();
10308 
10309  // Custom lower is only done for high or low doubleword.
10310  int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
10311  if (Idx % 2 != 0)
10312  return SDValue();
10313 
10314  // Since input is v4f32, at this point Idx is either 0 or 2.
10315  // Shift to get the doubleword position we want.
10316  int DWord = Idx >> 1;
10317 
10318  // High and low word positions are different on little endian.
10319  if (Subtarget.isLittleEndian())
10320  DWord ^= 0x1;
10321 
10322  return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
10323  Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
10324  }
10325  case ISD::FADD:
10326  case ISD::FMUL:
10327  case ISD::FSUB: {
10328  SDValue NewLoad[2];
10329  for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
10330  // Ensure both input are loads.
10331  SDValue LdOp = Op0.getOperand(i);
10332  if (LdOp.getOpcode() != ISD::LOAD)
10333  return SDValue();
10334  // Generate new load node.
10335  LoadSDNode *LD = cast<LoadSDNode>(LdOp);
10336  SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10337  NewLoad[i] = DAG.getMemIntrinsicNode(
10338  PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10339  LD->getMemoryVT(), LD->getMemOperand());
10340  }
10341  SDValue NewOp =
10342  DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
10343  NewLoad[1], Op0.getNode()->getFlags());
10344  return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
10345  DAG.getConstant(0, dl, MVT::i32));
10346  }
10347  case ISD::LOAD: {
10348  LoadSDNode *LD = cast<LoadSDNode>(Op0);
10349  SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10350  SDValue NewLd = DAG.getMemIntrinsicNode(
10351  PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10352  LD->getMemoryVT(), LD->getMemOperand());
10353  return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
10354  DAG.getConstant(0, dl, MVT::i32));
10355  }
10356  }
10357  llvm_unreachable("ERROR:Should return for all cases within swtich.");
10358 }
10359 
10360 /// LowerOperation - Provide custom lowering hooks for some operations.
10361 ///
10363  switch (Op.getOpcode()) {
10364  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
10365  case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10366  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10367  case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10368  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10369  case ISD::JumpTable: return LowerJumpTable(Op, DAG);
10370  case ISD::SETCC: return LowerSETCC(Op, DAG);
10371  case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
10372  case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
10373 
10374  case ISD::INLINEASM:
10375  case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
10376  // Variable argument lowering.
10377  case ISD::VASTART: return LowerVASTART(Op, DAG);
10378  case ISD::VAARG: return LowerVAARG(Op, DAG);
10379  case ISD::VACOPY: return LowerVACOPY(Op, DAG);
10380 
10381  case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
10382  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
10384  return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
10385 
10386  // Exception handling lowering.
10387  case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
10388  case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
10389  case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
10390 
10391  case ISD::LOAD: return LowerLOAD(Op, DAG);
10392  case ISD::STORE: return LowerSTORE(Op, DAG);
10393  case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
10394  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10397  case ISD::FP_TO_UINT:
10398  case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
10401  case ISD::UINT_TO_FP:
10402  case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10403  case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
10404 
10405  // Lower 64-bit shifts.
10406  case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
10407  case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
10408  case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
10409 
10410  case ISD::FSHL: return LowerFunnelShift(Op, DAG);
10411  case ISD::FSHR: return LowerFunnelShift(Op, DAG);
10412 
10413  // Vector-related lowering.
10414  case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
10415  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
10416  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
10417  case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
10418  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10419  case ISD::MUL: return LowerMUL(Op, DAG);
10420  case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10421  case ISD::STRICT_FP_ROUND:
10422  case ISD::FP_ROUND:
10423  return LowerFP_ROUND(Op, DAG);
10424  case ISD::ROTL: return LowerROTL(Op, DAG);
10425 
10426  // For counter-based loop handling.
10427  case ISD::INTRINSIC_W_CHAIN: return SDValue();
10428 
10429  case ISD::BITCAST: return LowerBITCAST(Op, DAG);
10430 
10431  // Frame & Return address.
10432  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10433  case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10434 
10435  case ISD::INTRINSIC_VOID:
10436  return LowerINTRINSIC_VOID(Op, DAG);
10437  case ISD::BSWAP:
10438  return LowerBSWAP(Op, DAG);
10439  case ISD::ATOMIC_CMP_SWAP:
10440  return LowerATOMIC_CMP_SWAP(Op, DAG);
10441  }
10442 }
10443 
10446  SelectionDAG &DAG) const {
10447  SDLoc dl(N);
10448  switch (N->getOpcode()) {
10449  default:
10450  llvm_unreachable("Do not know how to custom type legalize this operation!");
10451  case ISD::READCYCLECOUNTER: {
10453  SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
10454 
10455  Results.push_back(
10456  DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
10457  Results.push_back(RTB.getValue(2));
10458  break;
10459  }
10460  case ISD::INTRINSIC_W_CHAIN: {
10461  if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
10462  Intrinsic::loop_decrement)
10463  break;
10464 
10465  assert(N->getValueType(0) == MVT::i1 &&
10466  "Unexpected result type for CTR decrement intrinsic");
10467  EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
10468  N->getValueType(0));
10469  SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
10470  SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
10471  N->getOperand(1));
10472 
10473  Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
10474  Results.push_back(NewInt.getValue(1));
10475  break;
10476  }
10477  case ISD::VAARG: {
10478  if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
10479  return;
10480 
10481  EVT VT = N->getValueType(0);
10482 
10483  if (VT == MVT::i64) {
10484  SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
10485 
10486  Results.push_back(NewNode);
10487  Results.push_back(NewNode.getValue(1));
10488  }
10489  return;
10490  }
10493  case ISD::FP_TO_SINT:
10494  case ISD::FP_TO_UINT:
10495  // LowerFP_TO_INT() can only handle f32 and f64.
10496  if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
10497  MVT::ppcf128)
10498  return;
10499  Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
10500  return;
10501  case ISD::TRUNCATE: {
10502  if (!N->getValueType(0).isVector())
10503  return;
10504  SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
10505  if (Lowered)
10506  Results.push_back(Lowered);
10507  return;
10508  }
10509  case ISD::FSHL:
10510  case ISD::FSHR:
10511  // Don't handle funnel shifts here.
10512  return;
10513  case ISD::BITCAST:
10514  // Don't handle bitcast here.
10515  return;
10516  case ISD::FP_EXTEND:
10517  SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
10518  if (Lowered)
10519  Results.push_back(Lowered);
10520  return;
10521  }
10522 }
10523 
10524 //===----------------------------------------------------------------------===//
10525 // Other Lowering Code
10526 //===----------------------------------------------------------------------===//
10527 
10529  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10531  return Builder.CreateCall(Func, {});
10532 }
10533 
10534 // The mappings for emitLeading/TrailingFence is taken from
10535 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
10537  Instruction *Inst,
10538  AtomicOrdering Ord) const {
10540  return callIntrinsic(Builder, Intrinsic::ppc_sync);
10541  if (isReleaseOrStronger(Ord))
10542  return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10543  return nullptr;
10544 }
10545 
10547  Instruction *Inst,
10548  AtomicOrdering Ord) const {
10549  if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
10550  // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
10551  // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
10552  // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
10553  if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
10554  return Builder.CreateCall(
10556  Builder.GetInsertBlock()->getParent()->getParent(),
10557  Intrinsic::ppc_cfence, {Inst->getType()}),
10558  {Inst});
10559  // FIXME: Can use isync for rmw operation.
10560  return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10561  }
10562  return nullptr;
10563 }
10564 
10567  unsigned AtomicSize,
10568  unsigned BinOpcode,
10569  unsigned CmpOpcode,
10570  unsigned CmpPred) const {
10571  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10572  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10573 
10574  auto LoadMnemonic = PPC::LDARX;
10575  auto StoreMnemonic = PPC::STDCX;
10576  switch (AtomicSize) {
10577  default:
10578  llvm_unreachable("Unexpected size of atomic entity");
10579  case 1:
10580  LoadMnemonic = PPC::LBARX;
10581  StoreMnemonic = PPC::STBCX;
10582  assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10583  break;
10584  case 2:
10585  LoadMnemonic = PPC::LHARX;
10586  StoreMnemonic = PPC::STHCX;
10587  assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10588  break;
10589  case 4:
10590  LoadMnemonic = PPC::LWARX;
10591  StoreMnemonic = PPC::STWCX;
10592  break;
10593  case 8:
10594  LoadMnemonic = PPC::LDARX;
10595  StoreMnemonic = PPC::STDCX;
10596  break;
10597  }
10598 
10599  const BasicBlock *LLVM_BB = BB->getBasicBlock();
10600  MachineFunction *F = BB->getParent();
10602 
10603  Register dest = MI.getOperand(0).getReg();
10604  Register ptrA = MI.getOperand(1).getReg();
10605  Register ptrB = MI.getOperand(2).getReg();
10606  Register incr = MI.getOperand(3).getReg();
10607  DebugLoc dl = MI.getDebugLoc();
10608 
10609  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10610  MachineBasicBlock *loop2MBB =
10611  CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10612  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10613  F->insert(It, loopMBB);
10614  if (CmpOpcode)
10615  F->insert(It, loop2MBB);
10616  F->insert(It, exitMBB);
10617  exitMBB->splice(exitMBB->begin(), BB,
10618  std::next(MachineBasicBlock::iterator(MI)), BB->end());
10619  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10620 
10621  MachineRegisterInfo &RegInfo = F->getRegInfo();
10622  Register TmpReg = (!BinOpcode) ? incr :
10623  RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
10624  : &PPC::GPRCRegClass);
10625 
10626  // thisMBB:
10627  // ...
10628  // fallthrough --> loopMBB
10629  BB->addSuccessor(loopMBB);
10630 
10631  // loopMBB:
10632  // l[wd]arx dest, ptr
10633  // add r0, dest, incr
10634  // st[wd]cx. r0, ptr
10635  // bne- loopMBB
10636  // fallthrough --> exitMBB
10637 
10638  // For max/min...
10639  // loopMBB:
10640  // l[wd]arx dest, ptr
10641  // cmpl?[wd] incr, dest
10642  // bgt exitMBB
10643  // loop2MBB:
10644  // st[wd]cx. dest, ptr
10645  // bne- loopMBB
10646  // fallthrough --> exitMBB
10647 
10648  BB = loopMBB;
10649  BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
10650  .addReg(ptrA).addReg(ptrB);
10651  if (BinOpcode)
10652  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
10653  if (CmpOpcode) {
10654  // Signed comparisons of byte or halfword values must be sign-extended.
10655  if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
10656  Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10657  BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
10658  ExtReg).addReg(dest);
10659  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10660  .addReg(incr).addReg(ExtReg);
10661  } else
10662  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10663  .addReg(incr).addReg(dest);
10664 
10665  BuildMI(BB, dl, TII->get(PPC::BCC))
10666  .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
10667  BB->addSuccessor(loop2MBB);
10668  BB->addSuccessor(exitMBB);
10669  BB = loop2MBB;
10670  }
10671  BuildMI(BB, dl, TII->get(StoreMnemonic))
10672  .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
10673  BuildMI(BB, dl, TII->get(PPC::BCC))
10674  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
10675  BB->addSuccessor(loopMBB);
10676  BB->addSuccessor(exitMBB);
10677 
10678  // exitMBB:
10679  // ...
10680  BB = exitMBB;
10681  return BB;
10682 }
10683 
10685  switch(MI.getOpcode()) {
10686  default:
10687  return false;
10688  case PPC::COPY:
10689  return TII->isSignExtended(MI);
10690  case PPC::LHA:
10691  case PPC::LHA8:
10692  case PPC::LHAU:
10693  case PPC::LHAU8:
10694  case PPC::LHAUX:
10695  case PPC::LHAUX8:
10696  case PPC::LHAX:
10697  case PPC::LHAX8:
10698  case PPC::LWA:
10699  case PPC::LWAUX:
10700  case PPC::LWAX:
10701  case PPC::LWAX_32:
10702  case PPC::LWA_32:
10703  case PPC::PLHA:
10704  case PPC::PLHA8:
10705  case PPC::PLHA8pc:
10706  case PPC::PLHApc:
10707  case PPC::PLWA:
10708  case PPC::PLWA8:
10709  case PPC::PLWA8pc:
10710  case PPC::PLWApc:
10711  case PPC::EXTSB:
10712  case PPC::EXTSB8:
10713  case PPC::EXTSB8_32_64:
10714  case PPC::EXTSB8_rec:
10715  case PPC::EXTSB_rec:
10716  case PPC::EXTSH:
10717  case PPC::EXTSH8:
10718  case PPC::EXTSH8_32_64:
10719  case PPC::EXTSH8_rec:
10720  case PPC::EXTSH_rec:
10721  case PPC::EXTSW:
10722  case PPC::EXTSWSLI:
10723  case PPC::EXTSWSLI_32_64:
10724  case PPC::EXTSWSLI_32_64_rec:
10725  case PPC::EXTSWSLI_rec:
10726  case PPC::EXTSW_32:
10727  case PPC::EXTSW_32_64:
10728  case PPC::EXTSW_32_64_rec:
10729  case PPC::EXTSW_rec:
10730  case PPC::SRAW:
10731  case PPC::SRAWI:
10732  case PPC::SRAWI_rec:
10733  case PPC::SRAW_rec:
10734  return true;
10735  }
10736  return false;
10737 }
10738 
10741  bool is8bit, // operation
10742  unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
10743  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10744  const PPCInstrInfo *TII = Subtarget.getInstrInfo();
10745 
10746  // If this is a signed comparison and the value being compared is not known
10747  // to be sign extended, sign extend it here.
10748  DebugLoc dl = MI.getDebugLoc();
10749  MachineFunction *F = BB->getParent();
10750  MachineRegisterInfo &RegInfo = F->getRegInfo();
10751  Register incr = MI.getOperand(3).getReg();
10752  bool IsSignExtended = Register::isVirtualRegister(incr) &&
10753  isSignExtended(*RegInfo.getVRegDef(incr), TII);
10754 
10755  if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
10756  Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10757  BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
10758  .addReg(MI.getOperand(3).getReg());
10759  MI.getOperand(3).setReg(ValueReg);
10760  }
10761  // If we support part-word atomic mnemonics, just use them
10762  if (Subtarget.hasPartwordAtomics())
10763  return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
10764  CmpPred);
10765 
10766  // In 64 bit mode we have to use 64 bits for addresses, even though the
10767  // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
10768  // registers without caring whether they're 32 or 64, but here we're
10769  // doing actual arithmetic on the addresses.
10770  bool is64bit = Subtarget.isPPC64();
10771  bool isLittleEndian = Subtarget.isLittleEndian();
10772  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
10773 
10774  const BasicBlock *LLVM_BB = BB->getBasicBlock();
10776 
10777  Register dest = MI.getOperand(0).getReg();
10778  Register ptrA = MI.getOperand(1).getReg();
10779  Register ptrB = MI.getOperand(2).getReg();
10780 
10781  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10782  MachineBasicBlock *loop2MBB =
10783  CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10784  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10785  F->insert(It, loopMBB);
10786  if (CmpOpcode)
10787  F->insert(It, loop2MBB);
10788  F->insert(It, exitMBB);
10789  exitMBB->splice(exitMBB->begin(), BB,
10790  std::next(MachineBasicBlock::iterator(MI)), BB->end());
10791  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10792 
10793  const TargetRegisterClass *RC =
10794  is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
10795  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
10796 
10797  Register PtrReg = RegInfo.createVirtualRegister(RC);
10798  Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
10799  Register ShiftReg =
10800  isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
10801  Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
10802  Register MaskReg = RegInfo.createVirtualRegister(GPRC);
10803  Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
10804  Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
10805  Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
10806  Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
10807  Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
10808  Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
10809  Register Ptr1Reg;
10810  Register TmpReg =
10811  (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
10812 
10813  // thisMBB:
10814  // ...
10815  // fallthrough --> loopMBB
10816  BB->addSuccessor(loopMBB);
10817 
10818  // The 4-byte load must be aligned, while a char or short may be
10819  // anywhere in the word. Hence all this nasty bookkeeping code.
10820  // add ptr1, ptrA, ptrB [copy if ptrA==0]
10821  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10822  // xori shift, shift1, 24 [16]
10823  // rlwinm ptr, ptr1, 0, 0, 29
10824  // slw incr2, incr, shift
10825  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10826  // slw mask, mask2, shift
10827  // loopMBB:
10828  // lwarx tmpDest, ptr
10829  // add tmp, tmpDest, incr2
10830  // andc tmp2, tmpDest, mask
10831  // and tmp3, tmp, mask
10832  // or tmp4, tmp3, tmp2
10833  // stwcx. tmp4, ptr
10834  // bne- loopMBB
10835  // fallthrough --> exitMBB
10836  // srw dest, tmpDest, shift
10837  if (ptrA != ZeroReg) {
10838  Ptr1Reg = RegInfo.createVirtualRegister(RC);
10839  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
10840  .addReg(ptrA)
10841  .addReg(ptrB);
10842  } else {
10843  Ptr1Reg = ptrB;
10844  }
10845  // We need use 32-bit subregister to avoid mismatch register class in 64-bit
10846  // mode.
10847  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
10848  .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
10849  .addImm(3)
10850  .addImm(27)
10851  .addImm(is8bit ? 28 : 27);
10852  if (!isLittleEndian)
10853  BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
10854  .addReg(Shift1Reg)
10855  .addImm(is8bit ? 24 : 16);
10856  if (is64bit)
10857  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10858  .addReg(Ptr1Reg)
10859  .addImm(0)
10860  .addImm(61);
10861  else
10862  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10863  .addReg(Ptr1Reg)
10864  .addImm(0)
10865  .addImm(0)
10866  .addImm(29);
10867  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
10868  if (is8bit)
10869  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10870  else {
10871  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10872  BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10873  .addReg(Mask3Reg)
10874  .addImm(65535);
10875  }
10876  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10877  .addReg(Mask2Reg)
10878  .addReg(ShiftReg);
10879 
10880  BB = loopMBB;
10881  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10882  .addReg(ZeroReg)
10883  .addReg(PtrReg);
10884  if (BinOpcode)
10885  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
10886  .addReg(Incr2Reg)
10887  .addReg(TmpDestReg);
10888  BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
10889  .addReg(TmpDestReg)
10890  .addReg(MaskReg);
10891  BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
10892  if (CmpOpcode) {
10893  // For unsigned comparisons, we can directly compare the shifted values.
10894  // For signed comparisons we shift and sign extend.
10895  Register SReg = RegInfo.createVirtualRegister(GPRC);
10896  BuildMI(BB, dl, TII->get(PPC::AND), SReg)
10897  .addReg(TmpDestReg)
10898  .addReg(MaskReg);
10899  unsigned ValueReg = SReg;
10900  unsigned CmpReg = Incr2Reg;
10901  if (CmpOpcode == PPC::CMPW) {
10902  ValueReg = RegInfo.createVirtualRegister(GPRC);
10903  BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
10904  .addReg(SReg)
10905  .addReg(ShiftReg);
10906  Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
10907  BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
10908  .addReg(ValueReg);
10909  ValueReg = ValueSReg;
10910  CmpReg = incr;
10911  }
10912  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10913  .addReg(CmpReg)
10914  .addReg(ValueReg);
10915  BuildMI(BB, dl, TII->get(PPC::BCC))
10916  .addImm(CmpPred)
10917  .addReg(PPC::CR0)
10918  .addMBB(exitMBB);
10919  BB->addSuccessor(loop2MBB);
10920  BB->addSuccessor(exitMBB);
10921  BB = loop2MBB;
10922  }
10923  BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
10924  BuildMI(BB, dl, TII->get(PPC::STWCX))
10925  .addReg(Tmp4Reg)
10926  .addReg(ZeroReg)
10927  .addReg(PtrReg);
10928  BuildMI(BB, dl, TII->get(PPC::BCC))
10930  .addReg(PPC::CR0)
10931  .addMBB(loopMBB);
10932  BB->addSuccessor(loopMBB);
10933  BB->addSuccessor(exitMBB);
10934 
10935  // exitMBB:
10936  // ...
10937  BB = exitMBB;
10938  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
10939  .addReg(TmpDestReg)
10940  .addReg(ShiftReg);
10941  return BB;
10942 }
10943 
10946  MachineBasicBlock *MBB) const {
10947  DebugLoc DL = MI.getDebugLoc();
10948  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10949  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
10950 
10951  MachineFunction *MF = MBB->getParent();
10953 
10954  const BasicBlock *BB = MBB->getBasicBlock();
10956 
10957  Register DstReg = MI.getOperand(0).getReg();
10958  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
10959  assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
10960  Register mainDstReg = MRI.createVirtualRegister(RC);
10961  Register restoreDstReg = MRI.createVirtualRegister(RC);
10962 
10963  MVT PVT = getPointerTy(MF->getDataLayout());
10964  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
10965  "Invalid Pointer Size!");
10966  // For v = setjmp(buf), we generate
10967  //
10968  // thisMBB:
10969  // SjLjSetup mainMBB
10970  // bl mainMBB
10971  // v_restore = 1
10972  // b sinkMBB
10973  //
10974  // mainMBB:
10975  // buf[LabelOffset] = LR
10976  // v_main = 0
10977  //
10978  // sinkMBB:
10979  // v = phi(main, restore)
10980  //
10981 
10982  MachineBasicBlock *thisMBB = MBB;
10983  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
10984  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
10985  MF->insert(I, mainMBB);
10986  MF->insert(I, sinkMBB);
10987 
10988  MachineInstrBuilder MIB;
10989 
10990  // Transfer the remainder of BB and its successor edges to sinkMBB.
10991  sinkMBB->splice(sinkMBB->begin(), MBB,
10992  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
10994 
10995  // Note that the structure of the jmp_buf used here is not compatible
10996  // with that used by libc, and is not designed to be. Specifically, it
10997  // stores only those 'reserved' registers that LLVM does not otherwise
10998  // understand how to spill. Also, by convention, by the time this
10999  // intrinsic is called, Clang has already stored the frame address in the
11000  // first slot of the buffer and stack address in the third. Following the
11001  // X86 target code, we'll store the jump address in the second slot. We also
11002  // need to save the TOC pointer (R2) to handle jumps between shared
11003  // libraries, and that will be stored in the fourth slot. The thread
11004  // identifier (R13) is not affected.
11005 
11006  // thisMBB:
11007  const int64_t LabelOffset = 1 * PVT.getStoreSize();
11008  const int64_t TOCOffset = 3 * PVT.getStoreSize();
11009  const int64_t BPOffset = 4 * PVT.getStoreSize();
11010 
11011  // Prepare IP either in reg.
11012  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11013  Register LabelReg = MRI.createVirtualRegister(PtrRC);
11014  Register BufReg = MI.getOperand(1).getReg();
11015 
11016  if (Subtarget.is64BitELFABI()) {
11018  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11019  .addReg(PPC::X2)
11020  .addImm(TOCOffset)
11021  .addReg(BufReg)
11022  .cloneMemRefs(MI);
11023  }
11024 
11025  // Naked functions never have a base pointer, and so we use r1. For all
11026  // other functions, this decision must be delayed until during PEI.
11027  unsigned BaseReg;
11028  if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11029  BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11030  else
11031  BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11032 
11033  MIB = BuildMI(*thisMBB, MI, DL,
11034  TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11035  .addReg(BaseReg)
11036  .addImm(BPOffset)
11037  .addReg(BufReg)
11038  .cloneMemRefs(MI);
11039 
11040  // Setup
11041  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11042  MIB.addRegMask(TRI->getNoPreservedMask());
11043 
11044  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11045 
11046  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11047  .addMBB(mainMBB);
11048  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11049 
11050  thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11051  thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11052 
11053  // mainMBB:
11054  // mainDstReg = 0
11055  MIB =
11056  BuildMI(mainMBB, DL,
11057  TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11058 
11059  // Store IP
11060  if (Subtarget.isPPC64()) {
11061  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11062  .addReg(LabelReg)
11063  .addImm(LabelOffset)
11064  .addReg(BufReg);
11065  } else {
11066  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11067  .addReg(LabelReg)
11068  .addImm(LabelOffset)
11069  .addReg(BufReg);
11070  }
11071  MIB.cloneMemRefs(MI);
11072 
11073  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11074  mainMBB->addSuccessor(sinkMBB);
11075 
11076  // sinkMBB:
11077  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11078  TII->get(PPC::PHI), DstReg)
11079  .addReg(mainDstReg).addMBB(mainMBB)
11080  .addReg(restoreDstReg).addMBB(thisMBB);
11081 
11082  MI.eraseFromParent();
11083  return sinkMBB;
11084 }
11085 
11088  MachineBasicBlock *MBB) const {
11089  DebugLoc DL = MI.getDebugLoc();
11090  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11091 
11092  MachineFunction *MF = MBB->getParent();
11094 
11095  MVT PVT = getPointerTy(MF->getDataLayout());
11096  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11097  "Invalid Pointer Size!");
11098 
11099  const TargetRegisterClass *RC =
11100  (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11101  Register Tmp = MRI.createVirtualRegister(RC);
11102  // Since FP is only updated here but NOT referenced, it's treated as GPR.
11103  unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11104  unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11105  unsigned BP =
11106  (PVT == MVT::i64)
11107  ? PPC::X30
11108  : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11109  : PPC::R30);
11110 
11111  MachineInstrBuilder MIB;
11112 
11113  const int64_t LabelOffset = 1 * PVT.getStoreSize();
11114  const int64_t SPOffset = 2 * PVT.getStoreSize();
11115  const int64_t TOCOffset = 3 * PVT.getStoreSize();
11116  const int64_t BPOffset = 4 * PVT.getStoreSize();
11117 
11118  Register BufReg = MI.getOperand(0).getReg();
11119 
11120  // Reload FP (the jumped-to function may not have had a
11121  // frame pointer, and if so, then its r31 will be restored
11122  // as necessary).
11123  if (PVT == MVT::i64) {
11124  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11125  .addImm(0)
11126  .addReg(BufReg);
11127  } else {
11128  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11129  .addImm(0)
11130  .addReg(BufReg);
11131  }
11132  MIB.cloneMemRefs(MI);
11133 
11134  // Reload IP
11135  if (PVT == MVT::i64) {
11136  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11137  .addImm(LabelOffset)
11138  .addReg(BufReg);
11139  } else {
11140  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11141  .addImm(LabelOffset)
11142  .addReg(BufReg);
11143  }
11144  MIB.cloneMemRefs(MI);
11145 
11146  // Reload SP
11147  if (PVT == MVT::i64) {
11148  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11149  .addImm(SPOffset)
11150  .addReg(BufReg);
11151  } else {
11152  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11153  .addImm(SPOffset)
11154  .addReg(BufReg);
11155  }
11156  MIB.cloneMemRefs(MI);
11157 
11158  // Reload BP
11159  if (PVT == MVT::i64) {
11160  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11161  .addImm(BPOffset)
11162  .addReg(BufReg);
11163  } else {
11164  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11165  .addImm(BPOffset)
11166  .addReg(BufReg);
11167  }
11168  MIB.cloneMemRefs(MI);
11169 
11170  // Reload TOC
11171  if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11173  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11174  .addImm(TOCOffset)
11175  .addReg(BufReg)
11176  .cloneMemRefs(MI);
11177  }
11178 
11179  // Jump
11180  BuildMI(*MBB, MI, DL,
11181  TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11182  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11183 
11184  MI.eraseFromParent();
11185  return MBB;
11186 }
11187 
11189  // If the function specifically requests inline stack probes, emit them.
11190  if (MF.getFunction().hasFnAttribute("probe-stack"))
11191  return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11192  "inline-asm";
11193  return false;
11194 }
11195 
11197  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
11198  unsigned StackAlign = TFI->getStackAlignment();
11200  "Unexpected stack alignment");
11201  // The default stack probe size is 4096 if the function has no
11202  // stack-probe-size attribute.
11203  unsigned StackProbeSize = 4096;
11204  const Function &Fn = MF.getFunction();
11205  if (Fn.hasFnAttribute("stack-probe-size"))
11206  Fn.getFnAttribute("stack-probe-size")
11207  .getValueAsString()
11208  .getAsInteger(0, StackProbeSize);
11209  // Round down to the stack alignment.
11210  StackProbeSize &= ~(StackAlign - 1);
11211  return StackProbeSize ? StackProbeSize : StackAlign;
11212 }
11213 
11214 // Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
11215 // into three phases. In the first phase, it uses pseudo instruction
11216 // PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
11217 // FinalStackPtr. In the second phase, it generates a loop for probing blocks.
11218 // At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
11219 // MaxCallFrameSize so that it can calculate correct data area pointer.
11222  MachineBasicBlock *MBB) const {
11223  const bool isPPC64 = Subtarget.isPPC64();
11224  MachineFunction *MF = MBB->getParent();
11225  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11226  DebugLoc DL = MI.getDebugLoc();
11227  const unsigned ProbeSize = getStackProbeSize(*MF);
11228  const BasicBlock *ProbedBB = MBB->getBasicBlock();
11230  // The CFG of probing stack looks as
11231  // +-----+
11232  // | MBB |
11233  // +--+--+
11234  // |
11235  // +----v----+
11236  // +--->+ TestMBB +---+
11237  // | +----+----+ |
11238  // | | |
11239  // | +-----v----+ |
11240  // +---+ BlockMBB | |
11241  // +----------+ |
11242  // |
11243  // +---------+ |
11244  // | TailMBB +<--+
11245  // +---------+
11246  // In MBB, calculate previous frame pointer and final stack pointer.
11247  // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
11248  // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
11249  // TailMBB is spliced via \p MI.
11250  MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
11251  MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
11252  MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
11253 
11254  MachineFunction::iterator MBBIter = ++MBB->getIterator();
11255  MF->insert(MBBIter, TestMBB);
11256  MF->insert(MBBIter, BlockMBB);
11257  MF->insert(MBBIter, TailMBB);
11258 
11259  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
11260  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11261 
11262  Register DstReg = MI.getOperand(0).getReg();
11263  Register NegSizeReg = MI.getOperand(1).getReg();
11264  Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
11265  Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11266  Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11267  Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11268 
11269  // Since value of NegSizeReg might be realigned in prologepilog, insert a
11270  // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
11271  // NegSize.
11272  unsigned ProbeOpc;
11273  if (!MRI.hasOneNonDBGUse(NegSizeReg))
11274  ProbeOpc =
11275  isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
11276  else
11277  // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
11278  // and NegSizeReg will be allocated in the same phyreg to avoid
11279  // redundant copy when NegSizeReg has only one use which is current MI and
11280  // will be replaced by PREPARE_PROBED_ALLOCA then.
11281  ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
11282  : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
11283  BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
11284  .addDef(ActualNegSizeReg)
11285  .addReg(NegSizeReg)
11286  .add(MI.getOperand(2))
11287  .add(MI.getOperand(3));
11288 
11289  // Calculate final stack pointer, which equals to SP + ActualNegSize.
11290  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
11291  FinalStackPtr)
11292  .addReg(SPReg)
11293  .addReg(ActualNegSizeReg);
11294 
11295  // Materialize a scratch register for update.
11296  int64_t NegProbeSize = -(int64_t)ProbeSize;
11297  assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
11298  Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11299  if (!isInt<16>(NegProbeSize)) {
11300  Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11301  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
11302  .addImm(NegProbeSize >> 16);
11303  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
11304  ScratchReg)
11305  .addReg(TempReg)
11306  .addImm(NegProbeSize & 0xFFFF);
11307  } else
11308  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
11309  .addImm(NegProbeSize);
11310 
11311  {
11312  // Probing leading residual part.
11313  Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11314  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
11315  .addReg(ActualNegSizeReg)
11316  .addReg(ScratchReg);
11317  Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11318  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
11319  .addReg(Div)
11320  .addReg(ScratchReg);
11321  Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11322  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
11323  .addReg(Mul)
11324  .addReg(ActualNegSizeReg);
11325  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11326  .addReg(FramePointer)
11327  .addReg(SPReg)
11328  .addReg(NegMod);
11329  }
11330 
11331  {
11332  // Remaining part should be multiple of ProbeSize.
11333  Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
11334  BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
11335  .addReg(SPReg)
11336  .addReg(FinalStackPtr);
11337  BuildMI(TestMBB, DL, TII->get(PPC::BCC))
11339  .addReg(CmpResult)
11340  .addMBB(TailMBB);
11341  TestMBB->addSuccessor(BlockMBB);
11342  TestMBB->addSuccessor(TailMBB);
11343  }
11344 
11345  {
11346  // Touch the block.
11347  // |P...|P...|P...
11348  BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11349  .addReg(FramePointer)
11350  .addReg(SPReg)
11351  .addReg(ScratchReg);
11352  BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
11353  BlockMBB->addSuccessor(TestMBB);
11354  }
11355 
11356  // Calculation of MaxCallFrameSize is deferred to prologepilog, use
11357  // DYNAREAOFFSET pseudo instruction to get the future result.
11358  Register MaxCallFrameSizeReg =
11359  MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11360  BuildMI(TailMBB, DL,
11361  TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
11362  MaxCallFrameSizeReg)
11363  .add(MI.getOperand(2))
11364  .add(MI.getOperand(3));
11365  BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
11366  .addReg(SPReg)
11367  .addReg(MaxCallFrameSizeReg);
11368 
11369  // Splice instructions after MI to TailMBB.
11370  TailMBB->splice(TailMBB->end(), MBB,
11371  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11373  MBB->addSuccessor(TestMBB);
11374 
11375  // Delete the pseudo instruction.
11376  MI.eraseFromParent();
11377 
11378  ++NumDynamicAllocaProbed;
11379  return TailMBB;
11380 }
11381 
11384  MachineBasicBlock *BB) const {
11385  if (MI.getOpcode() == TargetOpcode::STACKMAP ||
11386  MI.getOpcode() == TargetOpcode::PATCHPOINT) {
11387  if (Subtarget.is64BitELFABI() &&
11388  MI.getOpcode() == TargetOpcode::PATCHPOINT &&
11389  !Subtarget.isUsingPCRelativeCalls()) {
11390  // Call lowering should have added an r2 operand to indicate a dependence
11391  // on the TOC base pointer value. It can't however, because there is no
11392  // way to mark the dependence as implicit there, and so the stackmap code
11393  // will confuse it with a regular operand. Instead, add the dependence
11394  // here.
11395  MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
11396  }
11397 
11398  return emitPatchPoint(MI, BB);
11399  }
11400 
11401  if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
11402  MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
11403  return emitEHSjLjSetJmp(MI, BB);
11404  } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
11405  MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
11406  return emitEHSjLjLongJmp(MI, BB);
11407  }
11408 
11409  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11410 
11411  // To "insert" these instructions we actually have to insert their
11412  // control-flow patterns.
11413  const BasicBlock *LLVM_BB = BB->getBasicBlock();
11415 
11416  MachineFunction *F = BB->getParent();
11417 
11418  if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11419  MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
11420  MI.getOpcode() == PPC::SELECT_I8) {
11422  if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11423  MI.getOpcode() == PPC::SELECT_CC_I8)
11424  Cond.push_back(MI.getOperand(4));
11425  else
11427  Cond.push_back(MI.getOperand(1));
11428 
11429  DebugLoc dl = MI.getDebugLoc();
11430  TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
11431  MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
11432  } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
11433  MI.getOpcode() == PPC::SELECT_CC_F8 ||
11434  MI.getOpcode() == PPC::SELECT_CC_F16 ||
11435  MI.getOpcode() == PPC::SELECT_CC_VRRC ||
11436  MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
11437  MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
11438  MI.getOpcode() == PPC::SELECT_CC_VSRC ||
11439  MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
11440  MI.getOpcode() == PPC::SELECT_CC_SPE ||
11441  MI.getOpcode() == PPC::SELECT_F4 ||
11442  MI.getOpcode() == PPC::SELECT_F8 ||
11443  MI.getOpcode() == PPC::SELECT_F16 ||
11444  MI.getOpcode() == PPC::SELECT_SPE ||
11445  MI.getOpcode() == PPC::SELECT_SPE4 ||
11446  MI.getOpcode() == PPC::SELECT_VRRC ||
11447  MI.getOpcode() == PPC::SELECT_VSFRC ||
11448  MI.getOpcode() == PPC::SELECT_VSSRC ||
11449  MI.getOpcode() == PPC::SELECT_VSRC) {
11450  // The incoming instruction knows the destination vreg to set, the
11451  // condition code register to branch on, the true/false values to
11452  // select between, and a branch opcode to use.
11453 
11454  // thisMBB:
11455  // ...
11456  // TrueVal = ...
11457  // cmpTY ccX, r1, r2
11458  // bCC copy1MBB
11459  // fallthrough --> copy0MBB
11460  MachineBasicBlock *thisMBB = BB;
11461  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
11462  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11463  DebugLoc dl = MI.getDebugLoc();
11464  F->insert(It, copy0MBB);
11465  F->insert(It, sinkMBB);
11466 
11467  // Transfer the remainder of BB and its successor edges to sinkMBB.
11468  sinkMBB->splice(sinkMBB->begin(), BB,
11469  std::next(MachineBasicBlock::iterator(MI)), BB->end());
11470  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11471 
11472  // Next, add the true and fallthrough blocks as its successors.
11473  BB->addSuccessor(copy0MBB);
11474  BB->addSuccessor(sinkMBB);
11475 
11476  if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
11477  MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
11478  MI.getOpcode() == PPC::SELECT_F16 ||
11479  MI.getOpcode() == PPC::SELECT_SPE4 ||
11480  MI.getOpcode() == PPC::SELECT_SPE ||
11481  MI.getOpcode() == PPC::SELECT_VRRC ||
11482  MI.getOpcode() == PPC::SELECT_VSFRC ||
11483  MI.getOpcode() == PPC::SELECT_VSSRC ||
11484  MI.getOpcode() == PPC::SELECT_VSRC) {
11485  BuildMI(BB, dl, TII->get(PPC::BC))
11486  .addReg(MI.getOperand(1).getReg())
11487  .addMBB(sinkMBB);
11488  } else {
11489  unsigned SelectPred = MI.getOperand(4).getImm();
11490  BuildMI(BB, dl, TII->get(PPC::BCC))
11491  .addImm(SelectPred)
11492  .addReg(MI.getOperand(1).getReg())
11493  .addMBB(sinkMBB);
11494  }
11495 
11496  // copy0MBB:
11497  // %FalseValue = ...
11498  // # fallthrough to sinkMBB
11499  BB = copy0MBB;
11500 
11501  // Update machine-CFG edges
11502  BB->addSuccessor(sinkMBB);
11503 
11504  // sinkMBB:
11505  // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
11506  // ...
11507  BB = sinkMBB;
11508  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
11509  .addReg(MI.getOperand(3).getReg())
11510  .addMBB(copy0MBB)
11511  .addReg(MI.getOperand(2).getReg())
11512  .addMBB(thisMBB);
11513  } else if (MI.getOpcode() == PPC::ReadTB) {
11514  // To read the 64-bit time-base register on a 32-bit target, we read the
11515  // two halves. Should the counter have wrapped while it was being read, we
11516  // need to try again.
11517  // ...
11518  // readLoop:
11519  // mfspr Rx,TBU # load from TBU
11520  // mfspr Ry,TB # load from TB
11521  // mfspr Rz,TBU # load from TBU
11522  // cmpw crX,Rx,Rz # check if 'old'='new'
11523  // bne readLoop # branch if they're not equal
11524  // ...
11525 
11526  MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
11527  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11528  DebugLoc dl = MI.getDebugLoc();
11529  F->insert(It, readMBB);
11530  F->insert(It, sinkMBB);
11531 
11532  // Transfer the remainder of BB and its successor edges to sinkMBB.
11533  sinkMBB->splice(sinkMBB->begin(), BB,
11534  std::next(MachineBasicBlock::iterator(MI)), BB->end());
11535  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11536 
11537  BB->addSuccessor(readMBB);
11538  BB = readMBB;
11539 
11540  MachineRegisterInfo &RegInfo = F->getRegInfo();
11541  Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11542  Register LoReg = MI.getOperand(0).getReg();
11543  Register HiReg = MI.getOperand(1).getReg();
11544 
11545  BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
11546  BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
11547  BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
11548 
11549  Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11550 
11551  BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
11552  .addReg(HiReg)
11553  .addReg(ReadAgainReg);
11554  BuildMI(BB, dl, TII->get(PPC::BCC))
11556  .addReg(CmpReg)
11557  .addMBB(readMBB);
11558 
11559  BB->addSuccessor(readMBB);
11560  BB->addSuccessor(sinkMBB);
11561  } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
11562  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
11563  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
11564  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
11565  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
11566  BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
11567  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
11568  BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
11569 
11570  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
11571  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
11572  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
11573  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
11574  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
11575  BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
11576  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
11577  BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
11578 
11579  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
11580  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
11581  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
11582  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
11583  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
11584  BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
11585  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
11586  BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
11587 
11588  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
11589  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
11590  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
11591  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
11592  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
11593  BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
11594  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
11595  BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
11596 
11597  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
11598  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
11599  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
11600  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
11601  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
11602  BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
11603  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
11604  BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
11605 
11606  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
11607  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
11608  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
11609  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
11610  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
11611  BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
11612  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
11613  BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
11614 
11615  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
11616  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
11617  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
11618  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
11619  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
11620  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
11621  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
11622  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
11623 
11624  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
11625  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
11626  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
11627  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
11628  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
11629  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
11630  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
11631  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
11632 
11633  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
11634  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
11635  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
11636  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
11637  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
11638  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
11639  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
11640  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
11641 
11642  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
11643  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
11644  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
11645  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
11646  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
11647  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
11648  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
11649  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
11650 
11651  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
11652  BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
11653  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
11654  BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
11655  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
11656  BB = EmitAtomicBinary(MI, BB, 4, 0);
11657  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
11658  BB = EmitAtomicBinary(MI, BB, 8, 0);
11659  else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
11660  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
11661  (Subtarget.hasPartwordAtomics() &&
11662  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
11663  (Subtarget.hasPartwordAtomics() &&
11664  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
11665  bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
11666 
11667  auto LoadMnemonic = PPC::LDARX;
11668  auto StoreMnemonic = PPC::STDCX;
11669  switch (MI.getOpcode()) {
11670  default:
11671  llvm_unreachable("Compare and swap of unknown size");
11672  case PPC::ATOMIC_CMP_SWAP_I8:
11673  LoadMnemonic = PPC::LBARX;
11674  StoreMnemonic = PPC::STBCX;
11675  assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
11676  break;
11677  case PPC::ATOMIC_CMP_SWAP_I16:
11678  LoadMnemonic = PPC::LHARX;
11679  StoreMnemonic = PPC::STHCX;
11680  assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
11681  break;
11682  case PPC::ATOMIC_CMP_SWAP_I32:
11683  LoadMnemonic = PPC::LWARX;
11684  StoreMnemonic = PPC::STWCX;
11685  break;
11686  case PPC::ATOMIC_CMP_SWAP_I64:
11687  LoadMnemonic = PPC::LDARX;
11688  StoreMnemonic = PPC::STDCX;
11689  break;
11690  }
11691  Register dest = MI.getOperand(0).getReg();
11692  Register ptrA = MI.getOperand(1).getReg();
11693  Register ptrB = MI.getOperand(2).getReg();
11694  Register oldval = MI.getOperand(3).getReg();
11695  Register newval = MI.getOperand(4).getReg();
11696  DebugLoc dl = MI.getDebugLoc();
11697 
11698  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11699  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11700  MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11701  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11702  F->insert(It, loop1MBB);
11703  F->insert(It, loop2MBB);
11704  F->insert(It, midMBB);
11705  F->insert(It, exitMBB);
11706  exitMBB->splice(exitMBB->begin(), BB,
11707  std::next(MachineBasicBlock::iterator(MI)), BB->end());
11708  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11709 
11710  // thisMBB:
11711  // ...
11712  // fallthrough --> loopMBB
11713  BB->addSuccessor(loop1MBB);
11714 
11715  // loop1MBB:
11716  // l[bhwd]arx dest, ptr
11717  // cmp[wd] dest, oldval
11718  // bne- midMBB
11719  // loop2MBB:
11720  // st[bhwd]cx. newval, ptr
11721  // bne- loopMBB
11722  // b exitBB
11723  // midMBB:
11724  // st[bhwd]cx. dest, ptr
11725  // exitBB:
11726  BB = loop1MBB;
11727  BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
11728  BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
11729  .addReg(oldval)
11730  .addReg(dest);
11731  BuildMI(BB, dl, TII->get(PPC::BCC))
11733  .addReg(PPC::CR0)
11734  .addMBB(midMBB);
11735  BB->addSuccessor(loop2MBB);
11736  BB->addSuccessor(midMBB);
11737 
11738  BB = loop2MBB;
11739  BuildMI(BB, dl, TII->get(StoreMnemonic))
11740  .addReg(newval)
11741  .addReg(ptrA)
11742  .addReg(ptrB);
11743  BuildMI(BB, dl, TII->get(PPC::BCC))
11745  .addReg(PPC::CR0)
11746  .addMBB(loop1MBB);
11747  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11748  BB->addSuccessor(loop1MBB);
11749  BB->addSuccessor(exitMBB);
11750 
11751  BB = midMBB;
11752  BuildMI(BB, dl, TII->get(StoreMnemonic))
11753  .addReg(dest)
11754  .addReg(ptrA)
11755  .addReg(ptrB);
11756  BB->addSuccessor(exitMBB);
11757 
11758  // exitMBB:
11759  // ...
11760  BB = exitMBB;
11761  } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
11762  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
11763  // We must use 64-bit registers for addresses when targeting 64-bit,
11764  // since we're actually doing arithmetic on them. Other registers
11765  // can be 32-bit.
11766  bool is64bit = Subtarget.isPPC64();
11767  bool isLittleEndian = Subtarget.isLittleEndian();
11768  bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
11769 
11770  Register dest = MI.getOperand(0).getReg();
11771  Register ptrA = MI.getOperand(1).getReg();
11772  Register ptrB = MI.getOperand(2).getReg();
11773  Register oldval = MI.getOperand(3).getReg();
11774  Register newval = MI.getOperand(4).getReg();
11775  DebugLoc dl = MI.getDebugLoc();
11776 
11777  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11778  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11779  MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11780  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11781  F->insert(It, loop1MBB);
11782  F->insert(It, loop2MBB);
11783  F->insert(It, midMBB);
11784  F->insert(It, exitMBB);
11785  exitMBB->splice(exitMBB->begin(), BB,
11786  std::next(MachineBasicBlock::iterator(MI)), BB->end());
11787  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11788 
11789  MachineRegisterInfo &RegInfo = F->getRegInfo();
11790  const TargetRegisterClass *RC =
11791  is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11792  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11793 
11794  Register PtrReg = RegInfo.createVirtualRegister(RC);
11795  Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11796  Register ShiftReg =
11797  isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11798  Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
11799  Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
11800  Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
11801  Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
11802  Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11803  Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11804  Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11805  Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11806  Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11807  Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11808  Register Ptr1Reg;
11809  Register TmpReg = RegInfo.createVirtualRegister(GPRC);
11810  Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11811  // thisMBB:
11812  // ...
11813  // fallthrough --> loopMBB
11814  BB->addSuccessor(loop1MBB);
11815 
11816  // The 4-byte load must be aligned, while a char or short may be
11817  // anywhere in the word. Hence all this nasty bookkeeping code.
11818  // add ptr1, ptrA, ptrB [copy if ptrA==0]
11819  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11820  // xori shift, shift1, 24 [16]
11821  // rlwinm ptr, ptr1, 0, 0, 29
11822  // slw newval2, newval, shift
11823  // slw oldval2, oldval,shift
11824  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11825  // slw mask, mask2, shift
11826  // and newval3, newval2, mask
11827  // and oldval3, oldval2, mask
11828  // loop1MBB:
11829  // lwarx tmpDest, ptr
11830  // and tmp, tmpDest, mask
11831  // cmpw tmp, oldval3
11832  // bne- midMBB
11833  // loop2MBB:
11834  // andc tmp2, tmpDest, mask
11835  // or tmp4, tmp2, newval3
11836  // stwcx. tmp4, ptr
11837  // bne- loop1MBB
11838  // b exitBB
11839  // midMBB:
11840  // stwcx. tmpDest, ptr
11841  // exitBB:
11842  // srw dest, tmpDest, shift
11843  if (ptrA != ZeroReg) {
11844  Ptr1Reg = RegInfo.createVirtualRegister(RC);
11845  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11846  .addReg(ptrA)
11847  .addReg(ptrB);
11848  } else {
11849  Ptr1Reg = ptrB;
11850  }
11851 
11852  // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11853  // mode.
11854  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11855  .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11856  .addImm(3)
11857  .addImm(27)
11858  .addImm(is8bit ? 28 : 27);
11859  if (!isLittleEndian)
11860  BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11861  .addReg(Shift1Reg)
11862  .addImm(is8bit ? 24 : 16);
11863  if (is64bit)
11864  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11865  .addReg(Ptr1Reg)
11866  .addImm(0)
11867  .addImm(61);
11868  else
11869  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11870  .addReg(Ptr1Reg)
11871  .addImm(0)
11872  .addImm(0)
11873  .addImm(29);
11874  BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
11875  .addReg(newval)
11876  .addReg(ShiftReg);
11877  BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
11878  .addReg(oldval)
11879  .addReg(ShiftReg);
11880  if (is8bit)
11881  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11882  else {
11883  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11884  BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11885  .addReg(Mask3Reg)
11886  .addImm(65535);
11887  }
11888  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11889  .addReg(Mask2Reg)
11890  .addReg(ShiftReg);
11891  BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
11892  .addReg(NewVal2Reg)
11893  .addReg(MaskReg);
11894  BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
11895  .addReg(OldVal2Reg)
11896  .addReg(MaskReg);
11897 
11898  BB = loop1MBB;
11899  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11900  .addReg(ZeroReg)
11901  .addReg(PtrReg);
11902  BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
11903  .addReg(TmpDestReg)
11904  .addReg(MaskReg);
11905  BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
11906  .addReg(TmpReg)
11907  .addReg(OldVal3Reg);
11908  BuildMI(BB, dl, TII->get(PPC::BCC))
11910  .addReg(PPC::CR0)
11911  .addMBB(midMBB);
11912  BB->addSuccessor(loop2MBB);
11913  BB->addSuccessor(midMBB);
11914 
11915  BB = loop2MBB;
11916  BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11917  .addReg(TmpDestReg)
11918  .addReg(MaskReg);
11919  BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
11920  .addReg(Tmp2Reg)
11921  .addReg(NewVal3Reg);
11922  BuildMI(BB, dl, TII->get(PPC::STWCX))
11923  .addReg(Tmp4Reg)
11924  .addReg(ZeroReg)
11925  .addReg(PtrReg);
11926  BuildMI(BB, dl, TII->get(PPC::BCC))
11928  .addReg(PPC::CR0)
11929  .addMBB(loop1MBB);
11930  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11931  BB->addSuccessor(loop1MBB);
11932  BB->addSuccessor(exitMBB);
11933 
11934  BB = midMBB;
11935  BuildMI(BB, dl, TII->get(PPC::STWCX))
11936  .addReg(TmpDestReg)
11937  .addReg(ZeroReg)
11938  .addReg(PtrReg);
11939  BB->addSuccessor(exitMBB);
11940 
11941  // exitMBB:
11942  // ...
11943  BB = exitMBB;
11944  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11945  .addReg(TmpReg)
11946  .addReg(ShiftReg);
11947  } else if (MI.getOpcode() == PPC::FADDrtz) {
11948  // This pseudo performs an FADD with rounding mode temporarily forced
11949  // to round-to-zero. We emit this via custom inserter since the FPSCR
11950  // is not modeled at the SelectionDAG level.
11951  Register Dest = MI.getOperand(0).getReg();
11952  Register Src1 = MI.getOperand(1).getReg();
11953  Register Src2 = MI.getOperand(2).getReg();
11954  DebugLoc dl = MI.getDebugLoc();
11955 
11956  MachineRegisterInfo &RegInfo = F->getRegInfo();
11957  Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
11958 
11959  // Save FPSCR value.
11960  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
11961 
11962  // Set rounding mode to round-to-zero.
11963  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
11964  .addImm(31)
11966 
11967  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
11968  .addImm(30)
11970 
11971  // Perform addition.
11972  auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
11973  .addReg(Src1)
11974  .addReg(Src2);
11975  if (MI.getFlag(MachineInstr::NoFPExcept))
11977 
11978  // Restore FPSCR value.
11979  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
11980  } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
11981  MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
11982  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
11983  MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
11984  unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
11985  MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
11986  ? PPC::ANDI8_rec
11987  : PPC::ANDI_rec;
11988  bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
11989  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
11990 
11991  MachineRegisterInfo &RegInfo = F->getRegInfo();
11992  Register Dest = RegInfo.createVirtualRegister(
11993  Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
11994 
11995  DebugLoc Dl = MI.getDebugLoc();
11996  BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
11997  .addReg(MI.getOperand(1).getReg())
11998  .addImm(1);
11999  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12000  MI.getOperand(0).getReg())
12001  .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12002  } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12003  DebugLoc Dl = MI.getDebugLoc();
12004  MachineRegisterInfo &RegInfo = F->getRegInfo();
12005  Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12006  BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12007  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12008  MI.getOperand(0).getReg())
12009  .addReg(CRReg);
12010  } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12011  DebugLoc Dl = MI.getDebugLoc();
12012  unsigned Imm = MI.getOperand(1).getImm();
12013  BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12014  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12015  MI.getOperand(0).getReg())
12016  .addReg(PPC::CR0EQ);
12017  } else if (MI.getOpcode() == PPC::SETRNDi) {
12018  DebugLoc dl = MI.getDebugLoc();
12019  Register OldFPSCRReg = MI.getOperand(0).getReg();
12020 
12021  // Save FPSCR value.
12022  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12023 
12024  // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12025  // the following settings:
12026  // 00 Round to nearest
12027  // 01 Round to 0
12028  // 10 Round to +inf
12029  // 11 Round to -inf
12030 
12031  // When the operand is immediate, using the two least significant bits of
12032  // the immediate to set the bits 62:63 of FPSCR.
12033  unsigned Mode = MI.getOperand(1).getImm();
12034  BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12035  .addImm(31)
12037 
12038  BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12039  .addImm(30)
12041  } else if (MI.getOpcode() == PPC::SETRND) {
12042  DebugLoc dl = MI.getDebugLoc();
12043 
12044  // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12045  // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12046  // If the target doesn't have DirectMove, we should use stack to do the
12047  // conversion, because the target doesn't have the instructions like mtvsrd
12048  // or mfvsrd to do this conversion directly.
12049  auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12050  if (Subtarget.hasDirectMove()) {
12051  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12052  .addReg(SrcReg);
12053  } else {
12054  // Use stack to do the register copy.
12055  unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12056  MachineRegisterInfo &RegInfo = F->getRegInfo();
12057  const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12058  if (RC == &PPC::F8RCRegClass) {
12059  // Copy register from F8RCRegClass to G8RCRegclass.
12060  assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
12061  "Unsupported RegClass.");
12062 
12063  StoreOp = PPC::STFD;
12064  LoadOp = PPC::LD;
12065  } else {
12066  // Copy register from G8RCRegClass to F8RCRegclass.
12067  assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
12068  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12069  "Unsupported RegClass.");
12070  }
12071 
12072  MachineFrameInfo &MFI = F->getFrameInfo();
12073  int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12074 
12075  MachineMemOperand *MMOStore = F->getMachineMemOperand(
12076  MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12078  MFI.getObjectAlign(FrameIdx));
12079 
12080  // Store the SrcReg into the stack.
12081  BuildMI(*BB, MI, dl, TII->get(StoreOp))
12082  .addReg(SrcReg)
12083  .addImm(0)
12084  .addFrameIndex(FrameIdx)
12085  .addMemOperand(MMOStore);
12086 
12087  MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12088  MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12090  MFI.getObjectAlign(FrameIdx));
12091 
12092  // Load from the stack where SrcReg is stored, and save to DestReg,
12093  // so we have done the RegClass conversion from RegClass::SrcReg to
12094  // RegClass::DestReg.
12095  BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12096  .addImm(0)
12097  .addFrameIndex(FrameIdx)
12098  .addMemOperand(MMOLoad);
12099  }
12100  };
12101 
12102  Register OldFPSCRReg = MI.getOperand(0).getReg();
12103 
12104  // Save FPSCR value.
12105  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12106 
12107  // When the operand is gprc register, use two least significant bits of the
12108  // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12109  //
12110  // copy OldFPSCRTmpReg, OldFPSCRReg
12111  // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12112  // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12113  // copy NewFPSCRReg, NewFPSCRTmpReg
12114  // mtfsf 255, NewFPSCRReg
12115  MachineOperand SrcOp = MI.getOperand(1);
12116  MachineRegisterInfo &RegInfo = F->getRegInfo();
12117  Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12118 
12119  copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12120 
12121  Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12122  Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12123 
12124  // The first operand of INSERT_SUBREG should be a register which has
12125  // subregisters, we only care about its RegClass, so we should use an
12126  // IMPLICIT_DEF register.
12127  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12128  BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12129  .addReg(ImDefReg)
12130  .add(SrcOp)
12131  .addImm(1);
12132 
12133  Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12134  BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12135  .addReg(OldFPSCRTmpReg)
12136  .addReg(ExtSrcReg)
12137  .addImm(0)
12138  .addImm(62);
12139 
12140  Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12141  copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12142 
12143  // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12144  // bits of FPSCR.
12145  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12146  .addImm(255)
12147  .addReg(NewFPSCRReg)
12148  .addImm(0)
12149  .addImm(0);
12150  } else if (MI.getOpcode() == PPC::SETFLM) {
12151  DebugLoc Dl = MI.getDebugLoc();
12152 
12153  // Result of setflm is previous FPSCR content, so we need to save it first.
12154  Register OldFPSCRReg = MI.getOperand(0).getReg();
12155  BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12156 
12157  // Put bits in 32:63 to FPSCR.
12158  Register NewFPSCRReg = MI.getOperand(1).getReg();
12159  BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12160  .addImm(255)
12161  .addReg(NewFPSCRReg)
12162  .addImm(0)
12163  .addImm(0);
12164  } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12165  MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12166  return emitProbedAlloca(MI, BB);
12167  } else {
12168  llvm_unreachable("Unexpected instr type to insert");
12169  }
12170 
12171  MI.eraseFromParent(); // The pseudo instruction is gone now.
12172  return BB;
12173 }
12174 
12175 //===----------------------------------------------------------------------===//
12176 // Target Optimization Hooks
12177 //===----------------------------------------------------------------------===//
12178 
12179 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
12180  // For the estimates, convergence is quadratic, so we essentially double the
12181  // number of digits correct after every iteration. For both FRE and FRSQRTE,
12182  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
12183  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
12184  int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
12185  if (VT.getScalarType() == MVT::f64)
12186  RefinementSteps++;
12187  return RefinementSteps;
12188 }
12189 
12190 SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
12191  const DenormalMode &Mode) const {
12192  // We only have VSX Vector Test for software Square Root.
12193  EVT VT = Op.getValueType();
12194  if (!isTypeLegal(MVT::i1) ||
12195  (VT != MVT::f64 &&
12196  ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
12198 
12199  SDLoc DL(Op);
12200  // The output register of FTSQRT is CR field.
12202  // ftsqrt BF,FRB
12203  // Let e_b be the unbiased exponent of the double-precision
12204  // floating-point operand in register FRB.
12205  // fe_flag is set to 1 if either of the following conditions occurs.
12206  // - The double-precision floating-point operand in register FRB is a zero,
12207  // a NaN, or an infinity, or a negative value.
12208  // - e_b is less than or equal to -970.
12209  // Otherwise fe_flag is set to 0.
12210  // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
12211  // not eligible for iteration. (zero/negative/infinity/nan or unbiased
12212  // exponent is less than -970)
12213  SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
12214  return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
12215  FTSQRT, SRIdxVal),
12216  0);
12217 }
12218 
12219 SDValue
12220 PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
12221  SelectionDAG &DAG) const {
12222  // We only have VSX Vector Square Root.
12223  EVT VT = Op.getValueType();
12224  if (VT != MVT::f64 &&
12225  ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
12227 
12228  return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
12229 }
12230 
12231 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
12232  int Enabled, int &RefinementSteps,
12233  bool &UseOneConstNR,
12234  bool Reciprocal) const {
12235  EVT VT = Operand.getValueType();
12236  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
12237  (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
12238  (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12239  (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12240  if (RefinementSteps == ReciprocalEstimate::Unspecified)
12241  RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12242 
12243  // The Newton-Raphson computation with a single constant does not provide
12244  // enough accuracy on some CPUs.
12245  UseOneConstNR = !Subtarget.needsTwoConstNR();
12246  return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
12247  }
12248  return SDValue();
12249 }
12250 
12251 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
12252  int Enabled,
12253  int &RefinementSteps) const {
12254  EVT VT = Operand.getValueType();
12255  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
12256  (VT == MVT::f64 && Subtarget.hasFRE()) ||
12257  (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12258  (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12259  if (RefinementSteps == ReciprocalEstimate::Unspecified)
12260  RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12261  return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
12262  }
12263  return SDValue();
12264 }
12265 
12266 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
12267  // Note: This functionality is used only when unsafe-fp-math is enabled, and
12268  // on cores with reciprocal estimates (which are used when unsafe-fp-math is
12269  // enabled for division), this functionality is redundant with the default
12270  // combiner logic (once the division -> reciprocal/multiply transformation
12271  // has taken place). As a result, this matters more for older cores than for
12272  // newer ones.
12273 
12274  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12275  // reciprocal if there are two or more FDIVs (for embedded cores with only
12276  // one FP pipeline) for three or more FDIVs (for generic OOO cores).
12277  switch (Subtarget.getCPUDirective()) {
12278  default:
12279  return 3;
12280  case PPC::DIR_440:
12281  case PPC::DIR_A2:
12282  case PPC::DIR_E500:
12283  case PPC::DIR_E500mc:
12284  case PPC::DIR_E5500:
12285  return 2;
12286  }
12287 }
12288 
12289 // isConsecutiveLSLoc needs to work even if all adds have not yet been
12290 // collapsed, and so we need to look through chains of them.
12292  int64_t& Offset, SelectionDAG &DAG) {
12293  if (DAG.isBaseWithConstantOffset(Loc)) {
12294  Base = Loc.getOperand(0);
12295  Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
12296 
12297  // The base might itself be a base plus an offset, and if so, accumulate
12298  // that as well.
12299  getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
12300  }
12301 }
12302 
12303 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
12304  unsigned Bytes, int Dist,
12305  SelectionDAG &DAG) {
12306  if (VT.getSizeInBits() / 8 != Bytes)
12307  return false;
12308 
12309  SDValue BaseLoc = Base->getBasePtr();
12310  if (Loc.getOpcode() == ISD::FrameIndex) {
12311  if (BaseLoc.getOpcode() != ISD::FrameIndex)
12312  return false;
12313  const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
12314  int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
12315  int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
12316  int FS = MFI.getObjectSize(FI);
12317  int BFS = MFI.getObjectSize(BFI);
12318  if (FS != BFS || FS != (int)Bytes) return false;
12319  return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
12320  }
12321 
12322  SDValue Base1 = Loc, Base2 = BaseLoc;
12323  int64_t Offset1 = 0, Offset2 = 0;
12324  getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
12325  getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
12326  if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
12327  return true;
12328 
12329  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12330  const GlobalValue *GV1 = nullptr;
12331  const GlobalValue *GV2 = nullptr;
12332  Offset1 = 0;
12333  Offset2 = 0;
12334  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
12335  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
12336  if (isGA1 && isGA2 && GV1 == GV2)
12337  return Offset1 == (Offset2 + Dist*Bytes);
12338  return false;
12339 }
12340 
12341 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
12342 // not enforce equality of the chain operands.
12343 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
12344  unsigned Bytes, int Dist,
12345  SelectionDAG &DAG) {
12346  if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
12347  EVT VT = LS->getMemoryVT();
12348  SDValue Loc = LS->getBasePtr();
12349  return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
12350  }
12351 
12352  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
12353  EVT VT;
12354  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12355  default: return false;
12356  case Intrinsic::ppc_altivec_lvx:
12357  case Intrinsic::ppc_altivec_lvxl:
12358  case Intrinsic::ppc_vsx_lxvw4x:
12359  case Intrinsic::ppc_vsx_lxvw4x_be:
12360  VT = MVT::v4i32;
12361  break;
12362  case Intrinsic::ppc_vsx_lxvd2x:
12363  case Intrinsic::ppc_vsx_lxvd2x_be:
12364  VT = MVT::v2f64;
12365  break;
12366  case Intrinsic::ppc_altivec_lvebx:
12367  VT = MVT::i8;
12368  break;
12369  case Intrinsic::ppc_altivec_lvehx:
12370  VT = MVT::i16;
12371  break;
12372  case Intrinsic::ppc_altivec_lvewx:
12373  VT = MVT::i32;
12374  break;
12375  }
12376 
12377  return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
12378  }
12379 
12380  if (N->getOpcode() == ISD::INTRINSIC_VOID) {
12381  EVT VT;
12382  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12383  default: return false;
12384  case Intrinsic::ppc_altivec_stvx:
12385  case Intrinsic::ppc_altivec_stvxl:
12386  case Intrinsic::ppc_vsx_stxvw4x:
12387  VT = MVT::v4i32;
12388  break;
12389  case Intrinsic::ppc_vsx_stxvd2x:
12390  VT = MVT::v2f64;
12391  break;
12392  case Intrinsic::ppc_vsx_stxvw4x_be:
12393  VT = MVT::v4i32;
12394  break;
12395  case Intrinsic::ppc_vsx_stxvd2x_be:
12396  VT = MVT::v2f64;
12397  break;
12398  case Intrinsic::ppc_altivec_stvebx:
12399  VT = MVT::i8;
12400  break;
12401  case Intrinsic::ppc_altivec_stvehx:
12402  VT = MVT::i16;
12403  break;
12404  case Intrinsic::ppc_altivec_stvewx:
12405  VT = MVT::i32;
12406  break;
12407  }
12408 
12409  return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
12410  }
12411 
12412  return false;
12413 }
12414 
12415 // Return true is there is a nearyby consecutive load to the one provided
12416 // (regardless of alignment). We search up and down the chain, looking though
12417 // token factors and other loads (but nothing else). As a result, a true result
12418 // indicates that it is safe to create a new consecutive load adjacent to the
12419 // load provided.
12421  SDValue Chain = LD->getChain();
12422  EVT VT = LD->getMemoryVT();
12423 
12424  SmallSet<SDNode *, 16> LoadRoots;
12425  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
12426  SmallSet<SDNode *, 16> Visited;
12427 
12428  // First, search up the chain, branching to follow all token-factor operands.
12429  // If we find a consecutive load, then we're done, otherwise, record all
12430  // nodes just above the top-level loads and token factors.
12431  while (!Queue.empty()) {
12432  SDNode *ChainNext = Queue.pop_back_val();
12433  if (!Visited.insert(ChainNext).second)
12434  continue;
12435 
12436  if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
12437  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12438  return true;
12439 
12440  if (!Visited.count(ChainLD->getChain().getNode()))
12441  Queue.push_back(ChainLD->getChain().getNode());
12442  } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
12443  for (const SDUse &O : ChainNext->ops())
12444  if (!Visited.count(O.getNode()))
12445  Queue.push_back(O.getNode());
12446  } else
12447  LoadRoots.insert(ChainNext);
12448  }
12449 
12450  // Second, search down the chain, starting from the top-level nodes recorded
12451  // in the first phase. These top-level nodes are the nodes just above all
12452  // loads and token factors. Starting with their uses, recursively look though
12453  // all loads (just the chain uses) and token factors to find a consecutive
12454  // load.
12455  Visited.clear();
12456  Queue.clear();
12457 
12458  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
12459  IE = LoadRoots.end(); I != IE; ++I) {
12460  Queue.push_back(*I);
12461 
12462  while (!Queue.empty()) {
12463  SDNode *LoadRoot = Queue.pop_back_val();
12464  if (!Visited.insert(LoadRoot).second)
12465  continue;
12466 
12467  if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
12468  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12469  return true;
12470 
12471  for (SDNode::use_iterator UI = LoadRoot->use_begin(),
12472  UE = LoadRoot->use_end(); UI != UE; ++UI)
12473  if (((isa<MemSDNode>(*UI) &&
12474  cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
12475  UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
12476  Queue.push_back(*UI);
12477  }
12478  }
12479 
12480  return false;
12481 }
12482 
12483 /// This function is called when we have proved that a SETCC node can be replaced
12484 /// by subtraction (and other supporting instructions) so that the result of
12485 /// comparison is kept in a GPR instead of CR. This function is purely for
12486 /// codegen purposes and has some flags to guide the codegen process.
12487 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
12488  bool Swap, SDLoc &DL, SelectionDAG &DAG) {
12489  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12490 
12491  // Zero extend the operands to the largest legal integer. Originally, they
12492  // must be of a strictly smaller size.
12493  auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
12494  DAG.getConstant(Size, DL, MVT::i32));
12495  auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
12496  DAG.getConstant(Size, DL, MVT::i32));
12497 
12498  // Swap if needed. Depends on the condition code.
12499  if (Swap)
12500  std::swap(Op0, Op1);
12501 
12502  // Subtract extended integers.
12503  auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
12504 
12505  // Move the sign bit to the least significant position and zero out the rest.
12506  // Now the least significant bit carries the result of original comparison.
12507  auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
12508  DAG.getConstant(Size - 1, DL, MVT::i32));
12509  auto Final = Shifted;
12510 
12511  // Complement the result if needed. Based on the condition code.
12512  if (Complement)
12513  Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
12514  DAG.getConstant(1, DL, MVT::i64));
12515 
12516  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
12517 }
12518 
12519 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
12520  DAGCombinerInfo &DCI) const {
12521  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12522 
12523  SelectionDAG &DAG = DCI.DAG;
12524  SDLoc DL(N);
12525 
12526  // Size of integers being compared has a critical role in the following
12527  // analysis, so we prefer to do this when all types are legal.
12528  if (!DCI.isAfterLegalizeDAG())
12529  return SDValue();
12530 
12531  // If all users of SETCC extend its value to a legal integer type
12532  // then we replace SETCC with a subtraction
12533  for (SDNode::use_iterator UI = N->use_begin(),
12534  UE = N->use_end(); UI != UE; ++UI) {
12535  if (UI->getOpcode() != ISD::ZERO_EXTEND)
12536  return SDValue();
12537  }
12538 
12539  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
12540  auto OpSize = N->getOperand(0).getValueSizeInBits();
12541 
12543 
12544  if (OpSize < Size) {
12545  switch (CC) {
12546  default: break;
12547  case ISD::SETULT:
12548  return generateEquivalentSub(N, Size, false, false, DL, DAG);
12549  case ISD::SETULE:
12550  return generateEquivalentSub(N, Size, true, true, DL, DAG);
12551  case ISD::SETUGT:
12552  return generateEquivalentSub(N, Size, false, true, DL, DAG);
12553  case ISD::SETUGE:
12554  return generateEquivalentSub(N, Size, true, false, DL, DAG);
12555  }
12556  }
12557 
12558  return SDValue();
12559 }
12560 
12561 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
12562  DAGCombinerInfo &DCI) const {
12563  SelectionDAG &DAG = DCI.DAG;
12564  SDLoc dl(N);
12565 
12566  assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
12567  // If we're tracking CR bits, we need to be careful that we don't have:
12568  // trunc(binary-ops(zext(x), zext(y)))
12569  // or
12570  // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
12571  // such that we're unnecessarily moving things into GPRs when it would be
12572  // better to keep them in CR bits.
12573 
12574  // Note that trunc here can be an actual i1 trunc, or can be the effective
12575  // truncation that comes from a setcc or select_cc.
12576  if (N->getOpcode() == ISD::TRUNCATE &&
12577  N->getValueType(0) != MVT::i1)
12578  return SDValue();
12579 
12580  if (N->getOperand(0).getValueType() != MVT::i32 &&
12581  N->getOperand(0).getValueType() != MVT::i64)
12582  return SDValue();
12583 
12584  if (N->getOpcode() == ISD::SETCC ||
12585  N->getOpcode() == ISD::SELECT_CC) {
12586  // If we're looking at a comparison, then we need to make sure that the
12587  // high bits (all except for the first) don't matter the result.
12588  ISD::CondCode CC =
12589  cast<CondCodeSDNode>(N->getOperand(
12590  N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
12591  unsigned OpBits = N->getOperand(0).getValueSizeInBits();
12592 
12593  if (ISD::isSignedIntSetCC(CC)) {
12594  if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
12595  DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
12596  return SDValue();
12597  } else if (ISD::isUnsignedIntSetCC(CC)) {
12598  if (!DAG.MaskedValueIsZero(N->getOperand(0),
12599  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
12600  !DAG.MaskedValueIsZero(N->getOperand(1),
12601  APInt::getHighBitsSet(OpBits, OpBits-1)))
12602  return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
12603  : SDValue());
12604  } else {
12605  // This is neither a signed nor an unsigned comparison, just make sure
12606  // that the high bits are equal.
12607  KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
12608  KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
12609 
12610  // We don't really care about what is known about the first bit (if
12611  // anything), so pretend that it is known zero for both to ensure they can
12612  // be compared as constants.
12613  Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
12614  Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
12615 
12616  if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
12617  Op1Known.getConstant() != Op2Known.getConstant())
12618  return SDValue();
12619  }
12620  }
12621 
12622  // We now know that the higher-order bits are irrelevant, we just need to
12623  // make sure that all of the intermediate operations are bit operations, and
12624  // all inputs are extensions.
12625  if (N->getOperand(0).getOpcode() != ISD::AND &&
12626  N->getOperand(0).getOpcode() != ISD::OR &&
12627  N->getOperand(0).getOpcode() != ISD::XOR &&
12628  N->getOperand(0).getOpcode() != ISD::SELECT &&
12629  N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
12630  N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
12631  N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
12632  N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
12633  N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
12634  return SDValue();
12635 
12636  if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
12637  N->getOperand(1).getOpcode() != ISD::AND &&
12638  N->getOperand(1).getOpcode() != ISD::OR &&
12639  N->getOperand(1).getOpcode() != ISD::XOR &&
12640  N->getOperand(1).getOpcode() != ISD::SELECT &&
12641  N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
12642  N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
12643  N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
12644  N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
12645  N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
12646  return SDValue();
12647 
12648  SmallVector<SDValue, 4> Inputs;
12649  SmallVector<SDValue, 8> BinOps, PromOps;
12650  SmallPtrSet<SDNode *, 16> Visited;
12651 
12652  for (unsigned i = 0; i < 2; ++i) {
12653  if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12654  N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12655  N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
12656  N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
12657  isa<ConstantSDNode>(N->getOperand(i)))
12658  Inputs.push_back(N->getOperand(i));
12659  else
12660  BinOps.push_back(N->getOperand(i));
12661 
12662  if (N->getOpcode() == ISD::TRUNCATE)
12663  break;
12664  }
12665 
12666  // Visit all inputs, collect all binary operations (and, or, xor and
12667  // select) that are all fed by extensions.
12668  while (!BinOps.empty()) {
12669  SDValue BinOp = BinOps.pop_back_val();
12670 
12671  if (!Visited.insert(BinOp.getNode()).second)
12672  continue;
12673 
12674  PromOps.push_back(BinOp);
12675 
12676  for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12677  // The condition of the select is not promoted.
12678  if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12679  continue;
12680  if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12681  continue;
12682 
12683  if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12684  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12685  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
12686  BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
12687  isa<ConstantSDNode>(BinOp.getOperand(i))) {
12688  Inputs.push_back(BinOp.getOperand(i));
12689  } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12690  BinOp.getOperand(i).getOpcode() == ISD::OR ||
12691  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12692  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12693  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
12694  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12695  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12696  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12697  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
12698  BinOps.push_back(BinOp.getOperand(i));
12699  } else {
12700  // We have an input that is not an extension or another binary
12701  // operation; we'll abort this transformation.
12702  return SDValue();
12703  }
12704  }
12705  }
12706 
12707  // Make sure that this is a self-contained cluster of operations (which
12708  // is not quite the same thing as saying that everything has only one
12709  // use).
12710  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12711  if (isa<ConstantSDNode>(Inputs[i]))
12712  continue;
12713 
12714  for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12715  UE = Inputs[i].getNode()->use_end();
12716  UI != UE; ++UI) {
12717  SDNode *User = *UI;
12718  if (User != N && !Visited.count(User))
12719  return SDValue();
12720 
12721  // Make sure that we're not going to promote the non-output-value
12722  // operand(s) or SELECT or SELECT_CC.
12723  // FIXME: Although we could sometimes handle this, and it does occur in
12724  // practice that one of the condition inputs to the select is also one of
12725  // the outputs, we currently can't deal with this.
12726  if (User->getOpcode() == ISD::SELECT) {
12727  if (User->getOperand(0) == Inputs[i])
12728  return SDValue();
12729  } else if (User->getOpcode() == ISD::SELECT_CC) {
12730  if (User->getOperand(0) == Inputs[i] ||
12731  User->getOperand(1) == Inputs[i])
12732  return SDValue();
12733  }
12734  }
12735  }
12736 
12737  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12738  for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12739  UE = PromOps[i].getNode()->use_end();
12740  UI != UE; ++UI) {
12741  SDNode *User = *UI;
12742  if (User != N && !Visited.count(User))
12743  return SDValue();
12744 
12745  // Make sure that we're not going to promote the non-output-value
12746  // operand(s) or SELECT or SELECT_CC.
12747  // FIXME: Although we could sometimes handle this, and it does occur in
12748  // practice that one of the condition inputs to the select is also one of
12749  // the outputs, we currently can't deal with this.
12750  if (User->getOpcode() == ISD::SELECT) {
12751  if (User->getOperand(0) == PromOps[i])
12752  return SDValue();
12753  } else if (User->getOpcode() == ISD::SELECT_CC) {
12754  if (User->getOperand(0) == PromOps[i] ||
12755  User->getOperand(1) == PromOps[i])
12756  return SDValue();
12757  }
12758  }
12759  }
12760 
12761  // Replace all inputs with the extension operand.
12762  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12763  // Constants may have users outside the cluster of to-be-promoted nodes,
12764  // and so we need to replace those as we do the promotions.
12765  if (isa<ConstantSDNode>(Inputs[i]))
12766  continue;
12767  else
12768  DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
12769  }
12770 
12771  std::list<HandleSDNode> PromOpHandles;
12772  for (auto &PromOp : PromOps)
12773  PromOpHandles.emplace_back(PromOp);
12774 
12775  // Replace all operations (these are all the same, but have a different
12776  // (i1) return type). DAG.getNode will validate that the types of
12777  // a binary operator match, so go through the list in reverse so that
12778  // we've likely promoted both operands first. Any intermediate truncations or
12779  // extensions disappear.
12780  while (!PromOpHandles.empty()) {
12781  SDValue PromOp = PromOpHandles.back().getValue();
12782  PromOpHandles.pop_back();
12783 
12784  if (PromOp.getOpcode() == ISD::TRUNCATE ||
12785  PromOp.getOpcode() == ISD::SIGN_EXTEND ||
12786  PromOp.getOpcode() == ISD::ZERO_EXTEND ||
12787  PromOp.getOpcode() == ISD::ANY_EXTEND) {
12788  if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
12789  PromOp.getOperand(0).getValueType() != MVT::i1) {
12790  // The operand is not yet ready (see comment below).
12791  PromOpHandles.emplace_front(PromOp);
12792  continue;
12793  }
12794 
12795  SDValue RepValue = PromOp.getOperand(0);
12796  if (isa<ConstantSDNode>(RepValue))
12797  RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
12798 
12799  DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
12800  continue;
12801  }
12802 
12803  unsigned C;
12804  switch (PromOp.getOpcode()) {
12805  default: C = 0; break;
12806  case ISD::SELECT: C = 1; break;
12807  case ISD::SELECT_CC: C = 2; break;
12808  }
12809 
12810  if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12811  PromOp.getOperand(C).getValueType() != MVT::i1) ||
12812  (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12813  PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
12814  // The to-be-promoted operands of this node have not yet been
12815  // promoted (this should be rare because we're going through the
12816  // list backward, but if one of the operands has several users in
12817  // this cluster of to-be-promoted nodes, it is possible).
12818  PromOpHandles.emplace_front(PromOp);
12819  continue;
12820  }
12821 
12822  SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
12823  PromOp.getNode()->op_end());
12824 
12825  // If there are any constant inputs, make sure they're replaced now.
12826  for (unsigned i = 0; i < 2; ++i)
12827  if (isa<ConstantSDNode>(Ops[C+i]))
12828  Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
12829 
12830  DAG.ReplaceAllUsesOfValueWith(PromOp,
12831  DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
12832  }
12833 
12834  // Now we're left with the initial truncation itself.
12835  if (N->getOpcode() == ISD::TRUNCATE)
12836  return N->getOperand(0);
12837 
12838  // Otherwise, this is a comparison. The operands to be compared have just
12839  // changed type (to i1), but everything else is the same.
12840  return SDValue(N, 0);
12841 }
12842 
12843 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
12844  DAGCombinerInfo &DCI) const {
12845  SelectionDAG &DAG = DCI.DAG;
12846  SDLoc dl(N);
12847 
12848  // If we're tracking CR bits, we need to be careful that we don't have:
12849  // zext(binary-ops(trunc(x), trunc(y)))
12850  // or
12851  // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
12852  // such that we're unnecessarily moving things into CR bits that can more
12853  // efficiently stay in GPRs. Note that if we're not certain that the high
12854  // bits are set as required by the final extension, we still may need to do
12855  // some masking to get the proper behavior.
12856 
12857  // This same functionality is important on PPC64 when dealing with
12858  // 32-to-64-bit extensions; these occur often when 32-bit values are used as
12859  // the return values of functions. Because it is so similar, it is handled
12860  // here as well.
12861 
12862  if (N->getValueType(0) != MVT::i32 &&
12863  N->getValueType(0) != MVT::i64)
12864  return SDValue();
12865 
12866  if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
12867  (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
12868  return SDValue();
12869 
12870  if (N->getOperand(0).getOpcode() != ISD::AND &&
12871  N->getOperand(0).getOpcode() != ISD::OR &&
12872  N->getOperand(0).getOpcode() != ISD::XOR &&
12873  N->getOperand(0).getOpcode() != ISD::SELECT &&
12874  N->getOperand(0).getOpcode() != ISD::SELECT_CC)
12875  return SDValue();
12876 
12877  SmallVector<SDValue, 4> Inputs;
12878  SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
12879  SmallPtrSet<SDNode *, 16> Visited;
12880 
12881  // Visit all inputs, collect all binary operations (and, or, xor and
12882  // select) that are all fed by truncations.
12883  while (!BinOps.empty()) {
12884  SDValue BinOp = BinOps.pop_back_val();
12885 
12886  if (!Visited.insert(BinOp.getNode()).second)
12887  continue;
12888 
12889  PromOps.push_back(BinOp);
12890 
12891  for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12892  // The condition of the select is not promoted.
12893  if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12894  continue;
12895  if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12896  continue;
12897 
12898  if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12899  isa<ConstantSDNode>(BinOp.getOperand(i))) {
12900  Inputs.push_back(BinOp.getOperand(i));
12901  } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12902  BinOp.getOperand(i).getOpcode() == ISD::OR ||
12903  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12904  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12905  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
12906  BinOps.push_back(BinOp.getOperand(i));
12907  } else {
12908  // We have an input that is not a truncation or another binary
12909  // operation; we'll abort this transformation.
12910  return SDValue();
12911  }
12912  }
12913  }
12914 
12915  // The operands of a select that must be truncated when the select is
12916  // promoted because the operand is actually part of the to-be-promoted set.
12917  DenseMap<SDNode *, EVT> SelectTruncOp[2];
12918 
12919  // Make sure that this is a self-contained cluster of operations (which
12920  // is not quite the same thing as saying that everything has only one
12921  // use).
12922  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12923  if (isa<ConstantSDNode>(Inputs[i]))
12924  continue;
12925 
12926  for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12927  UE = Inputs[i].getNode()->use_end();
12928  UI != UE; ++UI) {
12929  SDNode *User = *UI;
12930  if (User != N && !Visited.count(User))
12931  return SDValue();
12932 
12933  // If we're going to promote the non-output-value operand(s) or SELECT or
12934  // SELECT_CC, record them for truncation.
12935  if (User->getOpcode() == ISD::SELECT) {
12936  if (User->getOperand(0) == Inputs[i])
12937  SelectTruncOp[0].insert(std::make_pair(User,
12938  User->getOperand(0).getValueType()));
12939  } else if (User->getOpcode() == ISD::SELECT_CC) {
12940  if (User->getOperand(0) == Inputs[i])
12941  SelectTruncOp[0].insert(std::make_pair(User,
12942  User->getOperand(0).getValueType()));
12943  if (User->getOperand(1) == Inputs[i])
12944  SelectTruncOp[1].insert(std::make_pair(User,
12945  User->getOperand(1).getValueType()));
12946  }
12947  }
12948  }
12949 
12950  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12951  for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12952  UE = PromOps[i].getNode()->use_end();
12953  UI != UE; ++UI) {
12954  SDNode *User = *UI;
12955  if (User != N && !Visited.count(User))
12956  return SDValue();
12957 
12958  // If we're going to promote the non-output-value operand(s) or SELECT or
12959  // SELECT_CC, record them for truncation.
12960  if (User->getOpcode() == ISD::SELECT) {
12961  if (User->getOperand(0) == PromOps[i])
12962  SelectTruncOp[0].insert(std::make_pair(User,
12963  User->getOperand(0).getValueType()));
12964  } else if (User->getOpcode() == ISD::SELECT_CC) {
12965  if (User->getOperand(0) == PromOps[i])
12966  SelectTruncOp[0].insert(std::make_pair(User,
12967  User->getOperand(0).getValueType()));
12968  if (User->getOperand(1) == PromOps[i])
12969  SelectTruncOp[1].insert(std::make_pair(User,
12970  User->getOperand(1).getValueType()));
12971  }
12972  }
12973  }
12974 
12975  unsigned PromBits = N->getOperand(0).getValueSizeInBits();
12976  bool ReallyNeedsExt = false;
12977  if (N->getOpcode() != ISD::ANY_EXTEND) {
12978  // If all of the inputs are not already sign/zero extended, then
12979  // we'll still need to do that at the end.
12980  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12981  if (isa<ConstantSDNode>(Inputs[i]))
12982  continue;
12983 
12984  unsigned OpBits =
12985  Inputs[i].getOperand(0).getValueSizeInBits();
12986  assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
12987 
12988  if ((N->getOpcode() == ISD::ZERO_EXTEND &&
12989  !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
12990  APInt::getHighBitsSet(OpBits,
12991  OpBits-PromBits))) ||
12992  (N->getOpcode() == ISD::SIGN_EXTEND &&
12993  DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
12994  (OpBits-(PromBits-1)))) {
12995  ReallyNeedsExt = true;
12996  break;
12997  }
12998  }
12999  }
13000 
13001  // Replace all inputs, either with the truncation operand, or a
13002  // truncation or extension to the final output type.
13003  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13004  // Constant inputs need to be replaced with the to-be-promoted nodes that
13005  // use them because they might have users outside of the cluster of
13006  // promoted nodes.
13007  if (isa<ConstantSDNode>(Inputs[i]))
13008  continue;
13009 
13010  SDValue InSrc = Inputs[i].getOperand(0);
13011  if (Inputs[i].getValueType() == N->getValueType(0))
13012  DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13013  else if (N->getOpcode() == ISD::SIGN_EXTEND)
13014  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13015  DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13016  else if (N->getOpcode() == ISD::ZERO_EXTEND)
13017  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13018  DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13019  else
13020  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13021  DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13022  }
13023 
13024  std::list<HandleSDNode> PromOpHandles;
13025  for (auto &PromOp : PromOps)
13026  PromOpHandles.emplace_back(PromOp);
13027 
13028  // Replace all operations (these are all the same, but have a different
13029  // (promoted) return type). DAG.getNode will validate that the types of
13030  // a binary operator match, so go through the list in reverse so that
13031  // we've likely promoted both operands first.
13032  while (!PromOpHandles.empty()) {
13033  SDValue PromOp = PromOpHandles.back().getValue();
13034  PromOpHandles.pop_back();
13035 
13036  unsigned C;
13037  switch (PromOp.getOpcode()) {
13038  default: C = 0; break;
13039  case ISD::SELECT: C = 1; break;
13040  case ISD::SELECT_CC: C = 2; break;
13041  }
13042 
13043  if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13044  PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13045  (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13046  PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13047  // The to-be-promoted operands of this node have not yet been
13048  // promoted (this should be rare because we're going through the
13049  // list backward, but if one of the operands has several users in
13050  // this cluster of to-be-promoted nodes, it is possible).
13051  PromOpHandles.emplace_front(PromOp);
13052  continue;
13053  }
13054 
13055  // For SELECT and SELECT_CC nodes, we do a similar check for any
13056  // to-be-promoted comparison inputs.
13057  if (PromOp.getOpcode() == ISD::SELECT ||
13058  PromOp.getOpcode() == ISD::SELECT_CC) {
13059  if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13060  PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13061  (SelectTruncOp[1].count(PromOp.getNode()) &&
13062  PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13063  PromOpHandles.emplace_front(PromOp);
13064  continue;
13065  }
13066  }
13067 
13068  SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13069  PromOp.getNode()->op_end());
13070 
13071  // If this node has constant inputs, then they'll need to be promoted here.
13072  for (unsigned i = 0; i < 2; ++i) {
13073  if (!isa<ConstantSDNode>(Ops[C+i]))
13074  continue;
13075  if (Ops[C+i].getValueType() == N->getValueType(0))
13076  continue;
13077 
13078  if (N->getOpcode() == ISD::SIGN_EXTEND)
13079  Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13080  else if (N->getOpcode() == ISD::ZERO_EXTEND)
13081  Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13082  else
13083  Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13084  }
13085 
13086  // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13087  // truncate them again to the original value type.
13088  if (PromOp.getOpcode() == ISD::SELECT ||
13089  PromOp.getOpcode() == ISD::SELECT_CC) {
13090  auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13091  if (SI0 != SelectTruncOp[0].end())
13092  Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13093  auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13094  if (SI1 != SelectTruncOp[1].end())
13095  Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13096  }
13097 
13098  DAG.ReplaceAllUsesOfValueWith(PromOp,
13099  DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13100  }
13101 
13102  // Now we're left with the initial extension itself.
13103  if (!ReallyNeedsExt)
13104  return N->getOperand(0);
13105 
13106  // To zero extend, just mask off everything except for the first bit (in the
13107  // i1 case).
13108  if (N->getOpcode() == ISD::ZERO_EXTEND)
13109  return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13111  N->getValueSizeInBits(0), PromBits),
13112  dl, N->getValueType(0)));
13113 
13114  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
13115  "Invalid extension type");
13116  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13117  SDValue ShiftCst =
13118  DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13119  return DAG.getNode(
13120  ISD::SRA, dl, N->getValueType(0),
13121  DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13122  ShiftCst);
13123 }
13124 
13125 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13126  DAGCombinerInfo &DCI) const {
13127  assert(N->getOpcode() == ISD::SETCC &&
13128  "Should be called with a SETCC node");
13129 
13130  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13131  if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13132  SDValue LHS = N->getOperand(0);
13133  SDValue RHS = N->getOperand(1);
13134 
13135  // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13136  if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13137  LHS.hasOneUse())
13138  std::swap(LHS, RHS);
13139 
13140  // x == 0-y --> x+y == 0
13141  // x != 0-y --> x+y != 0
13142  if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13143  RHS.hasOneUse()) {
13144  SDLoc DL(N);
13145  SelectionDAG &DAG = DCI.DAG;
13146  EVT VT = N->getValueType(0);
13147  EVT OpVT = LHS.getValueType();
13148  SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13149  return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13150  }
13151  }
13152 
13153  return DAGCombineTruncBoolExt(N, DCI);
13154 }
13155 
13156 // Is this an extending load from an f32 to an f64?
13157 static bool isFPExtLoad(SDValue Op) {
13158  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13159  return LD->getExtensionType() == ISD::EXTLOAD &&
13160  Op.getValueType() == MVT::f64;
13161  return false;
13162 }
13163 
13164 /// Reduces the number of fp-to-int conversion when building a vector.
13165 ///
13166 /// If this vector is built out of floating to integer conversions,
13167 /// transform it to a vector built out of floating point values followed by a
13168 /// single floating to integer conversion of the vector.
13169 /// Namely (build_vector (fptosi $A), (fptosi $B), ...)
13170 /// becomes (fptosi (build_vector ($A, $B, ...)))
13171 SDValue PPCTargetLowering::
13172 combineElementTruncationToVectorTruncation(SDNode *N,
13173  DAGCombinerInfo &DCI) const {
13174  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13175  "Should be called with a BUILD_VECTOR node");
13176 
13177  SelectionDAG &DAG = DCI.DAG;
13178  SDLoc dl(N);
13179 
13180  SDValue FirstInput = N->getOperand(0);
13181  assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
13182  "The input operand must be an fp-to-int conversion.");
13183 
13184  // This combine happens after legalization so the fp_to_[su]i nodes are
13185  // already converted to PPCSISD nodes.
13186  unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
13187  if (FirstConversion == PPCISD::FCTIDZ ||
13188  FirstConversion == PPCISD::FCTIDUZ ||
13189  FirstConversion == PPCISD::FCTIWZ ||
13190  FirstConversion == PPCISD::FCTIWUZ) {
13191  bool IsSplat = true;
13192  bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
13193  FirstConversion == PPCISD::FCTIWUZ;
13194  EVT SrcVT = FirstInput.getOperand(0).getValueType();
13196  EVT TargetVT = N->getValueType(0);
13197  for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13198  SDValue NextOp = N->getOperand(i);
13199  if (NextOp.getOpcode() != PPCISD::MFVSR)
13200  return SDValue();
13201  unsigned NextConversion = NextOp.getOperand(0).getOpcode();
13202  if (NextConversion != FirstConversion)
13203  return SDValue();
13204  // If we are converting to 32-bit integers, we need to add an FP_ROUND.
13205  // This is not valid if the input was originally double precision. It is
13206  // also not profitable to do unless this is an extending load in which
13207  // case doing this combine will allow us to combine consecutive loads.
13208  if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
13209  return SDValue();
13210  if (N->getOperand(i) != FirstInput)
13211  IsSplat = false;
13212  }
13213 
13214  // If this is a splat, we leave it as-is since there will be only a single
13215  // fp-to-int conversion followed by a splat of the integer. This is better
13216  // for 32-bit and smaller ints and neutral for 64-bit ints.
13217  if (IsSplat)
13218  return SDValue();
13219 
13220  // Now that we know we have the right type of node, get its operands
13221  for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13222  SDValue In = N->getOperand(i).getOperand(0);
13223  if (Is32Bit) {
13224  // For 32-bit values, we need to add an FP_ROUND node (if we made it
13225  // here, we know that all inputs are extending loads so this is safe).
13226  if (In.isUndef())
13227  Ops.push_back(DAG.getUNDEF(SrcVT));
13228  else {
13229  SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
13230  MVT::f32, In.getOperand(0),
13231  DAG.getIntPtrConstant(1, dl));
13232  Ops.push_back(Trunc);
13233  }
13234  } else
13235  Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
13236  }
13237 
13238  unsigned Opcode;
13239  if (FirstConversion == PPCISD::FCTIDZ ||
13240  FirstConversion == PPCISD::FCTIWZ)
13241  Opcode = ISD::FP_TO_SINT;
13242  else
13243  Opcode = ISD::FP_TO_UINT;
13244 
13245  EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
13246  SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
13247  return DAG.getNode(Opcode, dl, TargetVT, BV);
13248  }
13249  return SDValue();
13250 }
13251 
13252 /// Reduce the number of loads when building a vector.
13253 ///
13254 /// Building a vector out of multiple loads can be converted to a load
13255 /// of the vector type if the loads are consecutive. If the loads are
13256 /// consecutive but in descending order, a shuffle is added at the end
13257 /// to reorder the vector.
13259  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13260  "Should be called with a BUILD_VECTOR node");
13261 
13262  SDLoc dl(N);
13263 
13264  // Return early for non byte-sized type, as they can't be consecutive.
13265  if (!N->getValueType(0).getVectorElementType().isByteSized())
13266  return SDValue();
13267 
13268  bool InputsAreConsecutiveLoads = true;
13269  bool InputsAreReverseConsecutive = true;
13270  unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
13271  SDValue FirstInput = N->getOperand(0);
13272  bool IsRoundOfExtLoad = false;
13273 
13274  if (FirstInput.getOpcode() == ISD::FP_ROUND &&
13275  FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
13276  LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
13277  IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
13278  }
13279  // Not a build vector of (possibly fp_rounded) loads.
13280  if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
13281  N->getNumOperands() == 1)
13282  return SDValue();
13283 
13284  for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
13285  // If any inputs are fp_round(extload), they all must be.
13286  if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
13287  return SDValue();
13288 
13289  SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
13290  N->getOperand(i);
13291  if (NextInput.getOpcode() != ISD::LOAD)
13292  return SDValue();
13293 
13294  SDValue PreviousInput =
13295  IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
13296  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
13297  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
13298 
13299  // If any inputs are fp_round(extload), they all must be.
13300  if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
13301  return SDValue();
13302 
13303  if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
13304  InputsAreConsecutiveLoads = false;
13305  if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
13306  InputsAreReverseConsecutive = false;
13307 
13308  // Exit early if the loads are neither consecutive nor reverse consecutive.
13309  if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
13310  return SDValue();
13311  }
13312 
13313  assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
13314  "The loads cannot be both consecutive and reverse consecutive.");
13315 
13316  SDValue FirstLoadOp =
13317  IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
13318  SDValue LastLoadOp =
13319  IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
13320  N->getOperand(N->getNumOperands()-1);
13321 
13322  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
13323  LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
13324  if (InputsAreConsecutiveLoads) {
13325  assert(LD1 && "Input needs to be a LoadSDNode.");
13326  return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
13327  LD1->getBasePtr(), LD1->getPointerInfo(),
13328  LD1->getAlignment());
13329  }
13330  if (InputsAreReverseConsecutive) {
13331  assert(LDL && "Input needs to be a LoadSDNode.");
13332  SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
13333  LDL->getBasePtr(), LDL->getPointerInfo(),
13334  LDL->getAlignment());
13336  for (int i = N->getNumOperands() - 1; i >= 0; i--)
13337  Ops.push_back(i);
13338 
13339  return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
13340  DAG.getUNDEF(N->getValueType(0)), Ops);
13341  }
13342  return SDValue();
13343 }
13344 
13345 // This function adds the required vector_shuffle needed to get
13346 // the elements of the vector extract in the correct position
13347 // as specified by the CorrectElems encoding.
13349  SDValue Input, uint64_t Elems,
13350  uint64_t CorrectElems) {
13351  SDLoc dl(N);
13352 
13353  unsigned NumElems = Input.getValueType().getVectorNumElements();
13354  SmallVector<int, 16> ShuffleMask(NumElems, -1);
13355 
13356  // Knowing the element indices being extracted from the original
13357  // vector and the order in which they're being inserted, just put
13358  // them at element indices required for the instruction.
13359  for (unsigned i = 0; i < N->getNumOperands(); i++) {
13360  if (DAG.getDataLayout().isLittleEndian())
13361  ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
13362  else
13363  ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
13364  CorrectElems = CorrectElems >> 8;
13365  Elems = Elems >> 8;
13366  }
13367 
13368  SDValue Shuffle =
13369  DAG.getVectorShuffle(Input.getValueType(), dl, Input,
13370  DAG.getUNDEF(Input.getValueType()), ShuffleMask);
13371 
13372  EVT VT = N->getValueType(0);
13373  SDValue Conv = DAG.getBitcast(VT, Shuffle);
13374 
13375  EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
13377  VT.getVectorNumElements());
13378  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
13379  DAG.getValueType(ExtVT));
13380 }
13381 
13382 // Look for build vector patterns where input operands come from sign
13383 // extended vector_extract elements of specific indices. If the correct indices
13384 // aren't used, add a vector shuffle to fix up the indices and create
13385 // SIGN_EXTEND_INREG node which selects the vector sign extend instructions
13386 // during instruction selection.
13388  // This array encodes the indices that the vector sign extend instructions
13389  // extract from when extending from one type to another for both BE and LE.
13390  // The right nibble of each byte corresponds to the LE incides.
13391  // and the left nibble of each byte corresponds to the BE incides.
13392  // For example: 0x3074B8FC byte->word
13393  // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
13394  // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
13395  // For example: 0x000070F8 byte->double word
13396  // For LE: the allowed indices are: 0x0,0x8
13397  // For BE: the allowed indices are: 0x7,0xF
13398  uint64_t TargetElems[] = {
13399  0x3074B8FC, // b->w
13400  0x000070F8, // b->d
13401  0x10325476, // h->w
13402  0x00003074, // h->d
13403  0x00001032, // w->d
13404  };
13405 
13406  uint64_t Elems = 0;
13407  int Index;
13408  SDValue Input;
13409 
13410  auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
13411  if (!Op)
13412  return false;
13413  if (Op.getOpcode() != ISD::SIGN_EXTEND &&
13414  Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
13415  return false;
13416 
13417  // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
13418  // of the right width.
13419  SDValue Extract = Op.getOperand(0);
13420  if (Extract.getOpcode() == ISD::ANY_EXTEND)
13421  Extract = Extract.getOperand(0);
13422  if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13423  return false;
13424 
13425  ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
13426  if (!ExtOp)
13427  return false;
13428 
13429  Index = ExtOp->getZExtValue();
13430  if (Input && Input != Extract.getOperand(0))
13431  return false;
13432 
13433  if (!Input)
13434  Input = Extract.getOperand(0);
13435 
13436  Elems = Elems << 8;
13437  Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
13438  Elems |= Index;
13439 
13440  return true;
13441  };
13442 
13443  // If the build vector operands aren't sign extended vector extracts,
13444  // of the same input vector, then return.
13445  for (unsigned i = 0; i < N->getNumOperands(); i++) {
13446  if (!isSExtOfVecExtract(N->getOperand(i))) {
13447  return SDValue();
13448  }
13449  }
13450 
13451  // If the vector extract indicies are not correct, add the appropriate
13452  // vector_shuffle.
13453  int TgtElemArrayIdx;
13454  int InputSize = Input.getValueType().getScalarSizeInBits();
13455  int OutputSize = N->getValueType(0).getScalarSizeInBits();
13456  if (InputSize + OutputSize == 40)
13457  TgtElemArrayIdx = 0;
13458  else if (InputSize + OutputSize == 72)
13459  TgtElemArrayIdx = 1;
13460  else if (InputSize + OutputSize == 48)
13461  TgtElemArrayIdx = 2;
13462  else if (InputSize + OutputSize == 80)
13463  TgtElemArrayIdx = 3;
13464  else if (InputSize + OutputSize == 96)
13465  TgtElemArrayIdx = 4;
13466  else
13467  return SDValue();
13468 
13469  uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
13470  CorrectElems = DAG.getDataLayout().isLittleEndian()
13471  ? CorrectElems & 0x0F0F0F0F0F0F0F0F
13472  : CorrectElems & 0xF0F0F0F0F0F0F0F0;
13473  if (Elems != CorrectElems) {
13474  return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
13475  }
13476 
13477  // Regular lowering will catch cases where a shuffle is not needed.
13478  return SDValue();
13479 }
13480 
13481 // Look for the pattern of a load from a narrow width to i128, feeding
13482 // into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
13483 // (LXVRZX). This node represents a zero extending load that will be matched
13484 // to the Load VSX Vector Rightmost instructions.
13486  SDLoc DL(N);
13487 
13488  // This combine is only eligible for a BUILD_VECTOR of v1i128.
13489  if (N->getValueType(0) != MVT::v1i128)
13490  return SDValue();
13491 
13492  SDValue Operand = N->getOperand(0);
13493  // Proceed with the transformation if the operand to the BUILD_VECTOR
13494  // is a load instruction.
13495  if (Operand.getOpcode() != ISD::LOAD)
13496  return SDValue();
13497 
13498  LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
13499  EVT MemoryType = LD->getMemoryVT();
13500 
13501  // This transformation is only valid if the we are loading either a byte,
13502  // halfword, word, or doubleword.
13503  bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
13505 
13506  // Ensure that the load from the narrow width is being zero extended to i128.
13507  if (!ValidLDType ||
13508  (LD->getExtensionType() != ISD::ZEXTLOAD &&
13509  LD->getExtensionType() != ISD::EXTLOAD))
13510  return SDValue();
13511 
13512  SDValue LoadOps[] = {
13513  LD->getChain(), LD->getBasePtr(),
13514  DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
13515 
13518  LoadOps, MemoryType, LD->getMemOperand());
13519 }
13520 
13521 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
13522  DAGCombinerInfo &DCI) const {
13523  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13524  "Should be called with a BUILD_VECTOR node");
13525 
13526  SelectionDAG &DAG = DCI.DAG;
13527  SDLoc dl(N);
13528 
13529  if (!Subtarget.hasVSX())
13530  return SDValue();
13531 
13532  // The target independent DAG combiner will leave a build_vector of
13533  // float-to-int conversions intact. We can generate MUCH better code for
13534  // a float-to-int conversion of a vector of floats.
13535  SDValue FirstInput = N->getOperand(0);
13536  if (FirstInput.getOpcode() == PPCISD::MFVSR) {
13537  SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
13538  if (Reduced)
13539  return Reduced;
13540  }
13541 
13542  // If we're building a vector out of consecutive loads, just load that
13543  // vector type.
13544  SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
13545  if (Reduced)
13546  return Reduced;
13547 
13548  // If we're building a vector out of extended elements from another vector
13549  // we have P9 vector integer extend instructions. The code assumes legal
13550  // input types (i.e. it can't handle things like v4i16) so do not run before
13551  // legalization.
13552  if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
13553  Reduced = combineBVOfVecSExt(N, DAG);
13554  if (Reduced)
13555  return Reduced;
13556  }
13557 
13558  // On Power10, the Load VSX Vector Rightmost instructions can be utilized
13559  // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
13560  // is a load from <valid narrow width> to i128.
13561  if (Subtarget.isISA3_1()) {
13562  SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
13563  if (BVOfZLoad)
13564  return BVOfZLoad;
13565  }
13566 
13567  if (N->getValueType(0) != MVT::v2f64)
13568  return SDValue();
13569 
13570  // Looking for:
13571  // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
13572  if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
13573  FirstInput.getOpcode() != ISD::UINT_TO_FP)
13574  return SDValue();
13575  if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
13576  N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
13577  return SDValue();
13578  if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
13579  return SDValue();
13580 
13581  SDValue Ext1 = FirstInput.getOperand(0);
13582  SDValue Ext2 = N->getOperand(1).getOperand(0);
13583  if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13585  return SDValue();
13586 
13587  ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
13588  ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
13589  if (!Ext1Op || !Ext2Op)
13590  return SDValue();
13591  if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
13592  Ext1.getOperand(0) != Ext2.getOperand(0))
13593  return SDValue();
13594 
13595  int FirstElem = Ext1Op->getZExtValue();
13596  int SecondElem = Ext2Op->getZExtValue();
13597  int SubvecIdx;
13598  if (FirstElem == 0 && SecondElem == 1)
13599  SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
13600  else if (FirstElem == 2 && SecondElem == 3)
13601  SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
13602  else
13603  return SDValue();
13604 
13605  SDValue SrcVec = Ext1.getOperand(0);
13606  auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
13608  return DAG.getNode(NodeType, dl, MVT::v2f64,
13609  SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
13610 }
13611 
13612 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
13613  DAGCombinerInfo &DCI) const {
13614  assert((N->getOpcode() == ISD::SINT_TO_FP ||
13615  N->getOpcode() == ISD::UINT_TO_FP) &&
13616  "Need an int -> FP conversion node here");
13617 
13618  if (useSoftFloat() || !Subtarget.has64BitSupport())
13619  return SDValue();
13620 
13621  SelectionDAG &DAG = DCI.DAG;
13622  SDLoc dl(N);
13623  SDValue Op(N, 0);
13624 
13625  // Don't handle ppc_fp128 here or conversions that are out-of-range capable
13626  // from the hardware.
13627  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
13628  return SDValue();
13629  if (!Op.getOperand(0).getValueType().isSimple())
13630  return SDValue();
13631  if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
13632  Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
13633  return SDValue();
13634 
13635  SDValue FirstOperand(Op.getOperand(0));
13636  bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
13637  (FirstOperand.getValueType() == MVT::i8 ||
13638  FirstOperand.getValueType() == MVT::i16);
13639  if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
13640  bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
13641  bool DstDouble = Op.getValueType() == MVT::f64;
13642  unsigned ConvOp = Signed ?
13643  (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
13644  (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
13645  SDValue WidthConst =
13646  DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
13647  dl, false);
13648  LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
13649  SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
13652  Ops, MVT::i8, LDN->getMemOperand());
13653 
13654  // For signed conversion, we need to sign-extend the value in the VSR
13655  if (Signed) {
13656  SDValue ExtOps[] = { Ld, WidthConst };
13657  SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
13658  return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
13659  } else
13660  return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
13661  }
13662 
13663 
13664  // For i32 intermediate values, unfortunately, the conversion functions
13665  // leave the upper 32 bits of the value are undefined. Within the set of
13666  // scalar instructions, we have no method for zero- or sign-extending the
13667  // value. Thus, we cannot handle i32 intermediate values here.
13668  if (Op.getOperand(0).getValueType() == MVT::i32)
13669  return SDValue();
13670 
13671  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
13672  "UINT_TO_FP is supported only with FPCVT");
13673 
13674  // If we have FCFIDS, then use it when converting to single-precision.
13675  // Otherwise, convert to double-precision and then round.
13676  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
13677  ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
13678  : PPCISD::FCFIDS)
13679  : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
13680  : PPCISD::FCFID);
13681  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
13682  ? MVT::f32
13683  : MVT::f64;
13684 
13685  // If we're converting from a float, to an int, and back to a float again,
13686  // then we don't need the store/load pair at all.
13687  if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
13688  Subtarget.hasFPCVT()) ||
13689  (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
13690  SDValue Src = Op.getOperand(0).getOperand(0);
13691  if (Src.getValueType() == MVT::f32) {
13692  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
13693  DCI.AddToWorklist(Src.getNode());
13694  } else if (Src.getValueType() != MVT::f64) {
13695  // Make sure that we don't pick up a ppc_fp128 source value.
13696  return SDValue();
13697  }
13698 
13699  unsigned FCTOp =
13700  Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
13702 
13703  SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
13704  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
13705 
13706  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
13707  FP = DAG.getNode(ISD::FP_ROUND, dl,
13708  MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
13709  DCI.AddToWorklist(FP.getNode());
13710  }
13711 
13712  return FP;
13713  }
13714 
13715  return SDValue();
13716 }
13717 
13718 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
13719 // builtins) into loads with swaps.
13721  DAGCombinerInfo &DCI) const {
13722  SelectionDAG &DAG = DCI.DAG;
13723  SDLoc dl(N);
13724  SDValue Chain;
13725  SDValue Base;
13726  MachineMemOperand *MMO;
13727 
13728  switch (N->getOpcode()) {
13729  default:
13730  llvm_unreachable("Unexpected opcode for little endian VSX load");
13731  case ISD::LOAD: {
13732  LoadSDNode *LD = cast<LoadSDNode>(N);
13733  Chain = LD->getChain();
13734  Base = LD->getBasePtr();
13735  MMO = LD->getMemOperand();
13736  // If the MMO suggests this isn't a load of a full vector, leave
13737  // things alone. For a built-in, we have to make the change for
13738  // correctness, so if there is a size problem that will be a bug.
13739  if (MMO->getSize() < 16)
13740  return SDValue();
13741  break;
13742  }
13743  case ISD::INTRINSIC_W_CHAIN: {
13744  MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
13745  Chain = Intrin->getChain();
13746  // Similarly to the store case below, Intrin->getBasePtr() doesn't get
13747  // us what we want. Get operand 2 instead.
13748  Base = Intrin->getOperand(2);
13749  MMO = Intrin->getMemOperand();
13750  break;
13751  }
13752  }
13753 
13754  MVT VecTy = N->getValueType(0).getSimpleVT();
13755 
13756  // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
13757  // aligned and the type is a vector with elements up to 4 bytes
13758  if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
13759  VecTy.getScalarSizeInBits() <= 32) {
13760  return SDValue();
13761  }
13762 
13763  SDValue LoadOps[] = { Chain, Base };
13766  LoadOps, MVT::v2f64, MMO);
13767 
13768  DCI.AddToWorklist(Load.getNode());
13769  Chain = Load.getValue(1);
13770  SDValue Swap = DAG.getNode(
13771  PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
13772  DCI.AddToWorklist(Swap.getNode());
13773 
13774  // Add a bitcast if the resulting load type doesn't match v2f64.
13775  if (VecTy != MVT::v2f64) {
13776  SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
13777  DCI.AddToWorklist(N.getNode());
13778  // Package {bitcast value, swap's chain} to match Load's shape.
13779  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
13780  N, Swap.getValue(1));
13781  }
13782 
13783  return Swap;
13784 }
13785 
13786 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
13787 // builtins) into stores with swaps.
13789  DAGCombinerInfo &DCI) const {
13790  SelectionDAG &DAG = DCI.DAG;
13791  SDLoc dl(N);
13792  SDValue Chain;
13793  SDValue Base;
13794  unsigned SrcOpnd;
13795  MachineMemOperand *MMO;
13796 
13797  switch (N->getOpcode()) {
13798  default:
13799  llvm_unreachable("Unexpected opcode for little endian VSX store");
13800  case ISD::STORE: {
13801  StoreSDNode *ST = cast<StoreSDNode>(N);
13802  Chain = ST->getChain();
13803  Base = ST->getBasePtr();
13804  MMO = ST->getMemOperand();
13805  SrcOpnd = 1;
13806  // If the MMO suggests this isn't a store of a full vector, leave
13807  // things alone. For a built-in, we have to make the change for
13808  // correctness, so if there is a size problem that will be a bug.
13809  if (MMO->getSize() < 16)
13810  return SDValue();
13811  break;
13812  }
13813  case ISD::INTRINSIC_VOID: {
13814  MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
13815  Chain = Intrin->getChain();
13816  // Intrin->getBasePtr() oddly does not get what we want.
13817  Base = Intrin->getOperand(3);
13818  MMO = Intrin->getMemOperand();
13819  SrcOpnd = 2;
13820  break;
13821  }
13822  }
13823 
13824  SDValue Src = N->getOperand(SrcOpnd);
13825  MVT VecTy = Src.getValueType().getSimpleVT();
13826 
13827  // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
13828  // aligned and the type is a vector with elements up to 4 bytes
13829  if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
13830  VecTy.getScalarSizeInBits() <= 32) {
13831  return SDValue();
13832  }
13833 
13834  // All stores are done as v2f64 and possible bit cast.
13835  if (VecTy != MVT::v2f64) {
13836  Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
13837  DCI.AddToWorklist(Src.getNode());
13838  }
13839 
13840  SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
13841  DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
13842  DCI.AddToWorklist(Swap.getNode());
13843  Chain = Swap.getValue(1);
13844  SDValue StoreOps[] = { Chain, Swap, Base };
13846  DAG.getVTList(MVT::Other),
13847  StoreOps, VecTy, MMO);
13848  DCI.AddToWorklist(Store.getNode());
13849  return Store;
13850 }
13851 
13852 // Handle DAG combine for STORE (FP_TO_INT F).
13853 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
13854  DAGCombinerInfo &DCI) const {
13855 
13856  SelectionDAG &DAG = DCI.DAG;
13857  SDLoc dl(N);
13858  unsigned Opcode = N->getOperand(1).getOpcode();
13859 
13860  assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
13861  && "Not a FP_TO_INT Instruction!");
13862 
13863  SDValue Val = N->getOperand(1).getOperand(0);
13864  EVT Op1VT = N->getOperand(1).getValueType();
13865  EVT ResVT = Val.getValueType();
13866 
13867  if (!isTypeLegal(ResVT))
13868  return SDValue();
13869 
13870  // Only perform combine for conversion to i64/i32 or power9 i16/i8.
13871  bool ValidTypeForStoreFltAsInt =
13872  (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
13873  (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
13874 
13875  if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
13876  cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
13877  return SDValue();
13878 
13879  // Extend f32 values to f64
13880  if (ResVT.getScalarSizeInBits() == 32) {
13881  Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
13882  DCI.AddToWorklist(Val.getNode());
13883  }
13884 
13885  // Set signed or unsigned conversion opcode.
13886  unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
13889 
13890  Val = DAG.getNode(ConvOpcode,
13891  dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
13892  DCI.AddToWorklist(Val.getNode());
13893 
13894  // Set number of bytes being converted.
13895  unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
13896  SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
13897  DAG.getIntPtrConstant(ByteSize, dl, false),
13898  DAG.getValueType(Op1VT) };
13899 
13901  DAG.getVTList(MVT::Other), Ops,
13902  cast<StoreSDNode>(N)->getMemoryVT(),
13903  cast<StoreSDNode>(N)->getMemOperand());
13904 
13905  DCI.AddToWorklist(Val.getNode());
13906  return Val;
13907 }
13908 
13909 static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
13910  // Check that the source of the element keeps flipping
13911  // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
13912  bool PrevElemFromFirstVec = Mask[0] < NumElts;
13913  for (int i = 1, e = Mask.size(); i < e; i++) {
13914  if (PrevElemFromFirstVec && Mask[i] < NumElts)
13915  return false;
13916  if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
13917  return false;
13918  PrevElemFromFirstVec = !PrevElemFromFirstVec;
13919  }
13920  return true;
13921 }
13922 
13923 static bool isSplatBV(SDValue Op) {
13924  if (Op.getOpcode() != ISD::BUILD_VECTOR)
13925  return false;
13926  SDValue FirstOp;
13927 
13928  // Find first non-undef input.
13929  for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
13930  FirstOp = Op.getOperand(i);
13931  if (!FirstOp.isUndef())
13932  break;
13933  }
13934 
13935  // All inputs are undef or the same as the first non-undef input.
13936  for (int i = 1, e = Op.getNumOperands(); i < e; i++)
13937  if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
13938  return false;
13939  return true;
13940 }
13941 
13943  if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
13944  return Op;
13945  if (Op.getOpcode() != ISD::BITCAST)
13946  return SDValue();
13947  Op = Op.getOperand(0);
13948  if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
13949  return Op;
13950  return SDValue();
13951 }
13952 
13954  int LHSMaxIdx, int RHSMinIdx,
13955  int RHSMaxIdx, int HalfVec) {
13956  for (int i = 0, e = ShuffV.size(); i < e; i++) {
13957  int Idx = ShuffV[i];
13958  if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
13959  ShuffV[i] += HalfVec;
13960  }
13961 }
13962 
13963 // Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
13964 // the original is:
13965 // (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
13966 // In such a case, just change the shuffle mask to extract the element
13967 // from the permuted index.
13969  SDLoc dl(OrigSToV);
13970  EVT VT = OrigSToV.getValueType();
13971  assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
13972  "Expecting a SCALAR_TO_VECTOR here");
13973  SDValue Input = OrigSToV.getOperand(0);
13974 
13975  if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
13976  ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
13977  SDValue OrigVector = Input.getOperand(0);
13978 
13979  // Can't handle non-const element indices or different vector types
13980  // for the input to the extract and the output of the scalar_to_vector.
13981  if (Idx && VT == OrigVector.getValueType()) {
13982  SmallVector<int, 16> NewMask(VT.getVectorNumElements(), -1);
13983  NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
13984  return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
13985  }
13986  }
13987  return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
13988  OrigSToV.getOperand(0));
13989 }
13990 
13991 // On little endian subtargets, combine shuffles such as:
13992 // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
13993 // into:
13994 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
13995 // because the latter can be matched to a single instruction merge.
13996 // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
13997 // to put the value into element zero. Adjust the shuffle mask so that the
13998 // vector can remain in permuted form (to prevent a swap prior to a shuffle).
13999 SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
14000  SelectionDAG &DAG) const {
14001  SDValue LHS = SVN->getOperand(0);
14002  SDValue RHS = SVN->getOperand(1);
14003  auto Mask = SVN->getMask();
14004  int NumElts = LHS.getValueType().getVectorNumElements();
14005  SDValue Res(SVN, 0);
14006  SDLoc dl(SVN);
14007 
14008  // None of these combines are useful on big endian systems since the ISA
14009  // already has a big endian bias.
14010  if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14011  return Res;
14012 
14013  // If this is not a shuffle of a shuffle and the first element comes from
14014  // the second vector, canonicalize to the commuted form. This will make it
14015  // more likely to match one of the single instruction patterns.
14016  if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
14017  RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
14018  std::swap(LHS, RHS);
14019  Res = DAG.getCommutedVectorShuffle(*SVN);
14020  Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14021  }
14022 
14023  // Adjust the shuffle mask if either input vector comes from a
14024  // SCALAR_TO_VECTOR and keep the respective input vector in permuted
14025  // form (to prevent the need for a swap).
14026  SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
14027  SDValue SToVLHS = isScalarToVec(LHS);
14028  SDValue SToVRHS = isScalarToVec(RHS);
14029  if (SToVLHS || SToVRHS) {
14030  int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
14031  : SToVRHS.getValueType().getVectorNumElements();
14032  int NumEltsOut = ShuffV.size();
14033 
14034  // Initially assume that neither input is permuted. These will be adjusted
14035  // accordingly if either input is.
14036  int LHSMaxIdx = -1;
14037  int RHSMinIdx = -1;
14038  int RHSMaxIdx = -1;
14039  int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14040 
14041  // Get the permuted scalar to vector nodes for the source(s) that come from
14042  // ISD::SCALAR_TO_VECTOR.
14043  if (SToVLHS) {
14044  // Set up the values for the shuffle vector fixup.
14045  LHSMaxIdx = NumEltsOut / NumEltsIn;
14046  SToVLHS = getSToVPermuted(SToVLHS, DAG);
14047  if (SToVLHS.getValueType() != LHS.getValueType())
14048  SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14049  LHS = SToVLHS;
14050  }
14051  if (SToVRHS) {
14052  RHSMinIdx = NumEltsOut;
14053  RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14054  SToVRHS = getSToVPermuted(SToVRHS, DAG);
14055  if (SToVRHS.getValueType() != RHS.getValueType())
14056  SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14057  RHS = SToVRHS;
14058  }
14059 
14060  // Fix up the shuffle mask to reflect where the desired element actually is.
14061  // The minimum and maximum indices that correspond to element zero for both
14062  // the LHS and RHS are computed and will control which shuffle mask entries
14063  // are to be changed. For example, if the RHS is permuted, any shuffle mask
14064  // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
14065  // HalfVec to refer to the corresponding element in the permuted vector.
14066  fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
14067  HalfVec);
14068  Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14069 
14070  // We may have simplified away the shuffle. We won't be able to do anything
14071  // further with it here.
14072  if (!isa<ShuffleVectorSDNode>(Res))
14073  return Res;
14074  Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14075  }
14076 
14077  // The common case after we commuted the shuffle is that the RHS is a splat
14078  // and we have elements coming in from the splat at indices that are not
14079  // conducive to using a merge.
14080  // Example:
14081  // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14082  if (!isSplatBV(RHS))
14083  return Res;
14084 
14085  // We are looking for a mask such that all even elements are from
14086  // one vector and all odd elements from the other.
14087  if (!isAlternatingShuffMask(Mask, NumElts))
14088  return Res;
14089 
14090  // Adjust the mask so we are pulling in the same index from the splat
14091  // as the index from the interesting vector in consecutive elements.
14092  // Example (even elements from first vector):
14093  // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14094  if (Mask[0] < NumElts)
14095  for (int i = 1, e = Mask.size(); i < e; i += 2)
14096  ShuffV[i] = (ShuffV[i - 1] + NumElts);
14097  // Example (odd elements from first vector):
14098  // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14099  else
14100  for (int i = 0, e = Mask.size(); i < e; i += 2)
14101  ShuffV[i] = (ShuffV[i + 1] + NumElts);
14102 
14103  // If the RHS has undefs, we need to remove them since we may have created
14104  // a shuffle that adds those instead of the splat value.
14105  SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();
14106  RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);
14107 
14108  Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14109  return Res;
14110 }
14111 
14112 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14113  LSBaseSDNode *LSBase,
14114  DAGCombinerInfo &DCI) const {
14115  assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
14116  "Not a reverse memop pattern!");
14117 
14118  auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
14119  auto Mask = SVN->getMask();
14120  int i = 0;
14121  auto I = Mask.rbegin();
14122  auto E = Mask.rend();
14123 
14124  for (; I != E; ++I) {
14125  if (*I != i)
14126  return false;
14127  i++;
14128  }
14129  return true;
14130  };
14131 
14132  SelectionDAG &DAG = DCI.DAG;
14133  EVT VT = SVN->getValueType(0);
14134 
14135  if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14136  return SDValue();
14137 
14138  // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
14139  // See comment in PPCVSXSwapRemoval.cpp.
14140  // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
14141  if (!Subtarget.hasP9Vector())
14142  return SDValue();
14143 
14144  if(!IsElementReverse(SVN))
14145  return SDValue();
14146 
14147  if (LSBase->getOpcode() == ISD::LOAD) {
14148  SDLoc dl(SVN);
14149  SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
14150  return DAG.getMemIntrinsicNode(
14151  PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
14152  LSBase->getMemoryVT(), LSBase->getMemOperand());
14153  }
14154 
14155  if (LSBase->getOpcode() == ISD::STORE) {
14156  SDLoc dl(LSBase);
14157  SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
14158  LSBase->getBasePtr()};
14159  return DAG.getMemIntrinsicNode(
14160  PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
14161  LSBase->getMemoryVT(), LSBase->getMemOperand());
14162  }
14163 
14164  llvm_unreachable("Expected a load or store node here");
14165 }
14166 
14168  DAGCombinerInfo &DCI) const {
14169  SelectionDAG &DAG = DCI.DAG;
14170  SDLoc dl(N);
14171  switch (N->getOpcode()) {
14172  default: break;
14173  case ISD::ADD:
14174  return combineADD(N, DCI);
14175  case ISD::SHL:
14176  return combineSHL(N, DCI);
14177  case ISD::SRA:
14178  return combineSRA(N, DCI);
14179  case ISD::SRL:
14180  return combineSRL(N, DCI);
14181  case ISD::MUL:
14182  return combineMUL(N, DCI);
14183  case ISD::FMA:
14184  case PPCISD::FNMSUB:
14185  return combineFMALike(N, DCI);
14186  case PPCISD::SHL:
14187  if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
14188  return N->getOperand(0);
14189  break;
14190  case PPCISD::SRL:
14191  if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
14192  return N->getOperand(0);
14193  break;
14194  case PPCISD::SRA:
14195  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
14196  if (C->isNullValue() || // 0 >>s V -> 0.
14197  C->isAllOnesValue()) // -1 >>s V -> -1.
14198  return N->getOperand(0);
14199  }
14200  break;
14201  case ISD::SIGN_EXTEND:
14202  case ISD::ZERO_EXTEND:
14203  case ISD::ANY_EXTEND:
14204  return DAGCombineExtBoolTrunc(N, DCI);
14205  case ISD::TRUNCATE:
14206  return combineTRUNCATE(N, DCI);
14207  case ISD::SETCC:
14208  if (SDValue CSCC = combineSetCC(N, DCI))
14209  return CSCC;
14211  case ISD::SELECT_CC:
14212  return DAGCombineTruncBoolExt(N, DCI);
14213  case ISD::SINT_TO_FP:
14214  case ISD::UINT_TO_FP:
14215  return combineFPToIntToFP(N, DCI);
14216  case ISD::VECTOR_SHUFFLE:
14217  if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
14218  LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
14219  return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
14220  }
14221  return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
14222  case ISD::STORE: {
14223 
14224  EVT Op1VT = N->getOperand(1).getValueType();
14225  unsigned Opcode = N->getOperand(1).getOpcode();
14226 
14227  if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
14228  SDValue Val= combineStoreFPToInt(N, DCI);
14229  if (Val)
14230  return Val;
14231  }
14232 
14233  if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
14234  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
14235  SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
14236  if (Val)
14237  return Val;
14238  }
14239 
14240  // Turn STORE (BSWAP) -> sthbrx/stwbrx.
14241  if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
14242  N->getOperand(1).getNode()->hasOneUse() &&
14243  (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
14244  (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
14245 
14246  // STBRX can only handle simple types and it makes no sense to store less
14247  // two bytes in byte-reversed order.
14248  EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
14249  if (mVT.isExtended() || mVT.getSizeInBits() < 16)
14250  break;
14251 
14252  SDValue BSwapOp = N->getOperand(1).getOperand(0);
14253  // Do an any-extend to 32-bits if this is a half-word input.
14254  if (BSwapOp.getValueType() == MVT::i16)
14255  BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
14256 
14257  // If the type of BSWAP operand is wider than stored memory width
14258  // it need to be shifted to the right side before STBRX.
14259  if (Op1VT.bitsGT(mVT)) {
14260  int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
14261  BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
14262  DAG.getConstant(Shift, dl, MVT::i32));
14263  // Need to truncate if this is a bswap of i64 stored as i32/i16.
14264  if (Op1VT == MVT::i64)
14265  BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
14266  }
14267 
14268  SDValue Ops[] = {
14269  N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
14270  };
14271  return
14273  Ops, cast<StoreSDNode>(N)->getMemoryVT(),
14274  cast<StoreSDNode>(N)->getMemOperand());
14275  }
14276 
14277  // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
14278  // So it can increase the chance of CSE constant construction.
14279  if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
14280  isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
14281  // Need to sign-extended to 64-bits to handle negative values.
14282  EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
14283  uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
14284  MemVT.getSizeInBits());
14285  SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
14286 
14287  // DAG.getTruncStore() can't be used here because it doesn't accept
14288  // the general (base + offset) addressing mode.
14289  // So we use UpdateNodeOperands and setTruncatingStore instead.
14290  DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
14291  N->getOperand(3));
14292  cast<StoreSDNode>(N)->setTruncatingStore(true);
14293  return SDValue(N, 0);
14294  }
14295 
14296  // For little endian, VSX stores require generating xxswapd/lxvd2x.
14297  // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14298  if (Op1VT.isSimple()) {
14299  MVT StoreVT = Op1VT.getSimpleVT();
14300  if (Subtarget.needsSwapsForVSXMemOps() &&
14301  (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
14302  StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
14303  return expandVSXStoreForLE(N, DCI);
14304  }
14305  break;
14306  }
14307  case ISD::LOAD: {
14308  LoadSDNode *LD = cast<LoadSDNode>(N);
14309  EVT VT = LD->getValueType(0);
14310 
14311  // For little endian, VSX loads require generating lxvd2x/xxswapd.
14312  // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14313  if (VT.isSimple()) {
14314  MVT LoadVT = VT.getSimpleVT();
14315  if (Subtarget.needsSwapsForVSXMemOps() &&
14316  (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
14317  LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
14318  return expandVSXLoadForLE(N, DCI);
14319  }
14320 
14321  // We sometimes end up with a 64-bit integer load, from which we extract
14322  // two single-precision floating-point numbers. This happens with
14323  // std::complex<float>, and other similar structures, because of the way we
14324  // canonicalize structure copies. However, if we lack direct moves,
14325  // then the final bitcasts from the extracted integer values to the
14326  // floating-point numbers turn into store/load pairs. Even with direct moves,
14327  // just loading the two floating-point numbers is likely better.
14328  auto ReplaceTwoFloatLoad = [&]() {
14329  if (VT != MVT::i64)
14330  return false;
14331 
14332  if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
14333  LD->isVolatile())
14334  return false;
14335 
14336  // We're looking for a sequence like this:
14337  // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
14338  // t16: i64 = srl t13, Constant:i32<32>
14339  // t17: i32 = truncate t16
14340  // t18: f32 = bitcast t17
14341  // t19: i32 = truncate t13
14342  // t20: f32 = bitcast t19
14343 
14344  if (!LD->hasNUsesOfValue(2, 0))
14345  return false;
14346 
14347  auto UI = LD->use_begin();
14348  while (UI.getUse().getResNo() != 0) ++UI;
14349  SDNode *Trunc = *UI++;
14350  while (UI.getUse().getResNo() != 0) ++UI;
14351  SDNode *RightShift = *UI;
14352  if (Trunc->getOpcode() != ISD::TRUNCATE)
14353  std::swap(Trunc, RightShift);
14354 
14355  if (Trunc->getOpcode() != ISD::TRUNCATE ||
14356  Trunc->getValueType(0) != MVT::i32 ||
14357  !Trunc->hasOneUse())
14358  return false;
14359  if (RightShift->getOpcode() != ISD::SRL ||
14360  !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
14361  RightShift->getConstantOperandVal(1) != 32 ||
14362  !RightShift->hasOneUse())
14363  return false;
14364 
14365  SDNode *Trunc2 = *RightShift->use_begin();
14366  if (Trunc2->getOpcode() != ISD::TRUNCATE ||
14367  Trunc2->getValueType(0) != MVT::i32 ||
14368  !Trunc2->hasOneUse())
14369  return false;
14370 
14371  SDNode *Bitcast = *Trunc->use_begin();
14372  SDNode *Bitcast2 = *Trunc2->use_begin();
14373 
14374  if (Bitcast->getOpcode() != ISD::BITCAST ||
14375  Bitcast->getValueType(0) != MVT::f32)
14376  return false;
14377  if (Bitcast2->getOpcode() != ISD::BITCAST ||
14378  Bitcast2->getValueType(0) != MVT::f32)
14379  return false;
14380 
14381  if (Subtarget.isLittleEndian())
14382  std::swap(Bitcast, Bitcast2);
14383 
14384  // Bitcast has the second float (in memory-layout order) and Bitcast2
14385  // has the first one.
14386 
14387  SDValue BasePtr = LD->getBasePtr();
14388  if (LD->isIndexed()) {
14389  assert(LD->getAddressingMode() == ISD::PRE_INC &&
14390  "Non-pre-inc AM on PPC?");
14391  BasePtr =
14392  DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
14393  LD->getOffset());
14394  }
14395 
14396  auto MMOFlags =
14397  LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
14398  SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
14399  LD->getPointerInfo(), LD->getAlignment(),
14400  MMOFlags, LD->getAAInfo());
14401  SDValue AddPtr =
14402  DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
14403  BasePtr, DAG.getIntPtrConstant(4, dl));
14404  SDValue FloatLoad2 = DAG.getLoad(
14405  MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
14406  LD->getPointerInfo().getWithOffset(4),
14407  MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
14408 
14409  if (LD->isIndexed()) {
14410  // Note that DAGCombine should re-form any pre-increment load(s) from
14411  // what is produced here if that makes sense.
14412  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
14413  }
14414 
14415  DCI.CombineTo(Bitcast2, FloatLoad);
14416  DCI.CombineTo(Bitcast, FloatLoad2);
14417 
14418  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
14419  SDValue(FloatLoad2.getNode(), 1));
14420  return true;
14421  };
14422 
14423  if (ReplaceTwoFloatLoad())
14424  return SDValue(N, 0);
14425 
14426  EVT MemVT = LD->getMemoryVT();
14427  Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
14428  Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
14429  if (LD->isUnindexed() && VT.isVector() &&
14430  ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
14431  // P8 and later hardware should just use LOAD.
14432  !Subtarget.hasP8Vector() &&
14433  (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
14434  VT == MVT::v4f32))) &&
14435  LD->getAlign() < ABIAlignment) {
14436  // This is a type-legal unaligned Altivec load.
14437  SDValue Chain = LD->getChain();
14438  SDValue Ptr = LD->getBasePtr();
14439  bool isLittleEndian = Subtarget.isLittleEndian();
14440 
14441  // This implements the loading of unaligned vectors as described in
14442  // the venerable Apple Velocity Engine overview. Specifically:
14443  // https://developer.apple.com/hardwaredrivers/ve/alignment.html
14444  // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
14445  //
14446  // The general idea is to expand a sequence of one or more unaligned
14447  // loads into an alignment-based permutation-control instruction (lvsl
14448  // or lvsr), a series of regular vector loads (which always truncate
14449  // their input address to an aligned address), and a series of
14450  // permutations. The results of these permutations are the requested
14451  // loaded values. The trick is that the last "extra" load is not taken
14452  // from the address you might suspect (sizeof(vector) bytes after the
14453  // last requested load), but rather sizeof(vector) - 1 bytes after the
14454  // last requested vector. The point of this is to avoid a page fault if
14455  // the base address happened to be aligned. This works because if the
14456  // base address is aligned, then adding less than a full vector length
14457  // will cause the last vector in the sequence to be (re)loaded.
14458  // Otherwise, the next vector will be fetched as you might suspect was
14459  // necessary.
14460 
14461  // We might be able to reuse the permutation generation from
14462  // a different base address offset from this one by an aligned amount.
14463  // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
14464  // optimization later.
14465  Intrinsic::ID Intr, IntrLD, IntrPerm;
14466  MVT PermCntlTy, PermTy, LDTy;
14467  Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14468  : Intrinsic::ppc_altivec_lvsl;
14469  IntrLD = Intrinsic::ppc_altivec_lvx;
14470  IntrPerm = Intrinsic::ppc_altivec_vperm;
14471  PermCntlTy = MVT::v16i8;
14472  PermTy = MVT::v4i32;
14473  LDTy = MVT::v4i32;
14474 
14475  SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
14476 
14477  // Create the new MMO for the new base load. It is like the original MMO,
14478  // but represents an area in memory almost twice the vector size centered
14479  // on the original address. If the address is unaligned, we might start
14480  // reading up to (sizeof(vector)-1) bytes below the address of the
14481  // original unaligned load.
14482  MachineFunction &MF = DAG.getMachineFunction();
14483  MachineMemOperand *BaseMMO =
14484  MF.getMachineMemOperand(LD->getMemOperand(),
14485  -(long)MemVT.getStoreSize()+1,
14486  2*MemVT.getStoreSize()-1);
14487 
14488  // Create the new base load.
14489  SDValue LDXIntID =
14490  DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
14491  SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
14492  SDValue BaseLoad =
14494  DAG.getVTList(PermTy, MVT::Other),
14495  BaseLoadOps, LDTy, BaseMMO);
14496 
14497  // Note that the value of IncOffset (which is provided to the next
14498  // load's pointer info offset value, and thus used to calculate the
14499  // alignment), and the value of IncValue (which is actually used to
14500  // increment the pointer value) are different! This is because we
14501  // require the next load to appear to be aligned, even though it
14502  // is actually offset from the base pointer by a lesser amount.
14503  int IncOffset = VT.getSizeInBits() / 8;
14504  int IncValue = IncOffset;
14505 
14506  // Walk (both up and down) the chain looking for another load at the real
14507  // (aligned) offset (the alignment of the other load does not matter in
14508  // this case). If found, then do not use the offset reduction trick, as
14509  // that will prevent the loads from being later combined (as they would
14510  // otherwise be duplicates).
14511  if (!findConsecutiveLoad(LD, DAG))
14512  --IncValue;
14513 
14514  SDValue Increment =
14515  DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
14516  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
14517 
14518  MachineMemOperand *ExtraMMO =
14519  MF.getMachineMemOperand(LD->getMemOperand(),
14520  1, 2*MemVT.getStoreSize()-1);
14521  SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
14522  SDValue ExtraLoad =
14524  DAG.getVTList(PermTy, MVT::Other),
14525  ExtraLoadOps, LDTy, ExtraMMO);
14526 
14528  BaseLoad.getValue(1), ExtraLoad.getValue(1));
14529 
14530  // Because vperm has a big-endian bias, we must reverse the order
14531  // of the input vectors and complement the permute control vector
14532  // when generating little endian code. We have already handled the
14533  // latter by using lvsr instead of lvsl, so just reverse BaseLoad
14534  // and ExtraLoad here.
14535  SDValue Perm;
14536  if (isLittleEndian)
14537  Perm = BuildIntrinsicOp(IntrPerm,
14538  ExtraLoad, BaseLoad, PermCntl, DAG, dl);
14539  else
14540  Perm = BuildIntrinsicOp(IntrPerm,
14541  BaseLoad, ExtraLoad, PermCntl, DAG, dl);
14542 
14543  if (VT != PermTy)
14544  Perm = Subtarget.hasAltivec()
14545  ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
14546  : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
14547  DAG.getTargetConstant(1, dl, MVT::i64));
14548  // second argument is 1 because this rounding
14549  // is always exact.
14550 
14551  // The output of the permutation is our loaded result, the TokenFactor is
14552  // our new chain.
14553  DCI.CombineTo(N, Perm, TF);
14554  return SDValue(N, 0);
14555  }
14556  }
14557  break;
14558  case ISD::INTRINSIC_WO_CHAIN: {
14559  bool isLittleEndian = Subtarget.isLittleEndian();
14560  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
14561  Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14562  : Intrinsic::ppc_altivec_lvsl);
14563  if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
14564  SDValue Add = N->getOperand(1);
14565 
14566  int Bits = 4 /* 16 byte alignment */;
14567 
14568  if (DAG.MaskedValueIsZero(Add->getOperand(1),
14569  APInt::getAllOnesValue(Bits /* alignment */)
14570  .zext(Add.getScalarValueSizeInBits()))) {
14571  SDNode *BasePtr = Add->getOperand(0).getNode();
14572  for (SDNode::use_iterator UI = BasePtr->use_begin(),
14573  UE = BasePtr->use_end();
14574  UI != UE; ++UI) {
14575  if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14576  cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
14577  IID) {
14578  // We've found another LVSL/LVSR, and this address is an aligned
14579  // multiple of that one. The results will be the same, so use the
14580  // one we've just found instead.
14581 
14582  return SDValue(*UI, 0);
14583  }
14584  }
14585  }
14586 
14587  if (isa<ConstantSDNode>(Add->getOperand(1))) {
14588  SDNode *BasePtr = Add->getOperand(0).getNode();
14589  for (SDNode::use_iterator UI = BasePtr->use_begin(),
14590  UE = BasePtr->use_end(); UI != UE; ++UI) {
14591  if (UI->getOpcode() == ISD::ADD &&
14592  isa<ConstantSDNode>(UI->getOperand(1)) &&
14593  (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
14594  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
14595  (1ULL << Bits) == 0) {
14596  SDNode *OtherAdd = *UI;
14597  for (SDNode::use_iterator VI = OtherAdd->use_begin(),
14598  VE = OtherAdd->use_end(); VI != VE; ++VI) {
14599  if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14600  cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
14601  return SDValue(*VI, 0);
14602  }
14603  }
14604  }
14605  }
14606  }
14607  }
14608 
14609  // Combine vmaxsw/h/b(a, a's negation) to abs(a)
14610  // Expose the vabsduw/h/b opportunity for down stream
14611  if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
14612  (IID == Intrinsic::ppc_altivec_vmaxsw ||
14613  IID == Intrinsic::ppc_altivec_vmaxsh ||
14614  IID == Intrinsic::ppc_altivec_vmaxsb)) {
14615  SDValue V1 = N->getOperand(1);
14616  SDValue V2 = N->getOperand(2);
14617  if ((V1.getSimpleValueType() == MVT::v4i32 ||
14618  V1.getSimpleValueType() == MVT::v8i16 ||
14619  V1.getSimpleValueType() == MVT::v16i8) &&
14620  V1.getSimpleValueType() == V2.getSimpleValueType()) {
14621  // (0-a, a)
14622  if (V1.getOpcode() == ISD::SUB &&
14624  V1.getOperand(1) == V2) {
14625  return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
14626  }
14627  // (a, 0-a)
14628  if (V2.getOpcode() == ISD::SUB &&
14629  ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
14630  V2.getOperand(1) == V1) {
14631  return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
14632  }
14633  // (x-y, y-x)
14634  if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
14635  V1.getOperand(0) == V2.getOperand(1) &&
14636  V1.getOperand(1) == V2.getOperand(0)) {
14637  return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
14638  }
14639  }
14640  }
14641  }
14642 
14643  break;
14645  // For little endian, VSX loads require generating lxvd2x/xxswapd.
14646  // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14647  if (Subtarget.needsSwapsForVSXMemOps()) {
14648  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
14649  default:
14650  break;
14651  case Intrinsic::ppc_vsx_lxvw4x:
14652  case Intrinsic::ppc_vsx_lxvd2x:
14653  return expandVSXLoadForLE(N, DCI);
14654  }
14655  }
14656  break;
14657  case ISD::INTRINSIC_VOID:
14658  // For little endian, VSX stores require generating xxswapd/stxvd2x.
14659  // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14660  if (Subtarget.needsSwapsForVSXMemOps()) {
14661  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
14662  default:
14663  break;
14664  case Intrinsic::ppc_vsx_stxvw4x:
14665  case Intrinsic::ppc_vsx_stxvd2x:
14666  return expandVSXStoreForLE(N, DCI);
14667  }
14668  }
14669  break;
14670  case ISD::BSWAP:
14671  // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
14672  if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
14673  N->getOperand(0).hasOneUse() &&
14674  (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
14675  (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
14676  N->getValueType(0) == MVT::i64))) {
14677  SDValue Load = N->getOperand(0);
14678  LoadSDNode *LD = cast<LoadSDNode>(Load);
14679  // Create the byte-swapping load.
14680  SDValue Ops[] = {
14681  LD->getChain(), // Chain
14682  LD->getBasePtr(), // Ptr
14683  DAG.getValueType(N->getValueType(0)) // VT
14684  };
14685  SDValue BSLoad =
14687  DAG.getVTList(N->getValueType(0) == MVT::i64 ?
14689  Ops, LD->getMemoryVT(), LD->getMemOperand());
14690 
14691  // If this is an i16 load, insert the truncate.
14692  SDValue ResVal = BSLoad;
14693  if (N->getValueType(0) == MVT::i16)
14694  ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
14695 
14696  // First, combine the bswap away. This makes the value produced by the
14697  // load dead.
14698  DCI.CombineTo(N, ResVal);
14699 
14700  // Next, combine the load away, we give it a bogus result value but a real
14701  // chain result. The result value is dead because the bswap is dead.
14702  DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
14703 
14704  // Return N so it doesn't get rechecked!
14705  return SDValue(N, 0);
14706  }
14707  break;
14708  case PPCISD::VCMP:
14709  // If a VCMP_rec node already exists with exactly the same operands as this
14710  // node, use its result instead of this node (VCMP_rec computes both a CR6
14711  // and a normal output).
14712  //
14713  if (!N->getOperand(0).hasOneUse() &&
14714  !N->getOperand(1).hasOneUse() &&
14715  !N->getOperand(2).hasOneUse()) {
14716 
14717  // Scan all of the users of the LHS, looking for VCMP_rec's that match.
14718  SDNode *VCMPrecNode = nullptr;
14719 
14720  SDNode *LHSN = N->getOperand(0).getNode();
14721  for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
14722  UI != E; ++UI)
14723  if (UI->getOpcode() == PPCISD::VCMP_rec &&
14724  UI->getOperand(1) == N->getOperand(1) &&
14725  UI->getOperand(2) == N->getOperand(2) &&
14726  UI->getOperand(0) == N->getOperand(0)) {
14727  VCMPrecNode = *UI;
14728  break;
14729  }
14730 
14731  // If there is no VCMP_rec node, or if the flag value has a single use,
14732  // don't transform this.
14733  if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
14734  break;
14735 
14736  // Look at the (necessarily single) use of the flag value. If it has a
14737  // chain, this transformation is more complex. Note that multiple things
14738  // could use the value result, which we should ignore.
14739  SDNode *FlagUser = nullptr;
14740  for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
14741  FlagUser == nullptr; ++UI) {
14742  assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
14743  SDNode *User = *UI;
14744  for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
14745  if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
14746  FlagUser = User;
14747  break;
14748  }
14749  }
14750  }
14751 
14752  // If the user is a MFOCRF instruction, we know this is safe.
14753  // Otherwise we give up for right now.
14754  if (FlagUser->getOpcode() == PPCISD::MFOCRF)
14755  return SDValue(VCMPrecNode, 0);
14756  }
14757  break;
14758  case ISD::BRCOND: {
14759  SDValue Cond = N->getOperand(1);
14760  SDValue Target = N->getOperand(2);
14761 
14762  if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14763  cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
14764  Intrinsic::loop_decrement) {
14765 
14766  // We now need to make the intrinsic dead (it cannot be instruction
14767  // selected).
14768  DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
14769  assert(Cond.getNode()->hasOneUse() &&
14770  "Counter decrement has more than one use");
14771 
14772  return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
14773  N->getOperand(0), Target);
14774  }
14775  }
14776  break;
14777  case ISD::BR_CC: {
14778  // If this is a branch on an altivec predicate comparison, lower this so
14779  // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
14780  // lowering is done pre-legalize, because the legalizer lowers the predicate
14781  // compare down to code that is difficult to reassemble.
14782  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
14783  SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
14784 
14785  // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
14786  // value. If so, pass-through the AND to get to the intrinsic.
14787  if (LHS.getOpcode() == ISD::AND &&
14789  cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
14790  Intrinsic::loop_decrement &&
14791  isa<ConstantSDNode>(LHS.getOperand(1)) &&
14792  !isNullConstant(LHS.getOperand(1)))
14793  LHS = LHS.getOperand(0);
14794 
14795  if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14796  cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
14797  Intrinsic::loop_decrement &&
14798  isa<ConstantSDNode>(RHS)) {
14799  assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
14800  "Counter decrement comparison is not EQ or NE");
14801 
14802  unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
14803  bool isBDNZ = (CC == ISD::SETEQ && Val) ||
14804  (CC == ISD::SETNE && !Val);
14805 
14806  // We now need to make the intrinsic dead (it cannot be instruction
14807  // selected).
14808  DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
14809  assert(LHS.getNode()->hasOneUse() &&
14810  "Counter decrement has more than one use");
14811 
14812  return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
14813  N->getOperand(0), N->getOperand(4));
14814  }
14815 
14816  int CompareOpc;
14817  bool isDot;
14818 
14819  if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14820  isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
14821  getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
14822  assert(isDot && "Can't compare against a vector result!");
14823 
14824  // If this is a comparison against something other than 0/1, then we know
14825  // that the condition is never/always true.
14826  unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
14827  if (Val != 0 && Val != 1) {
14828  if (CC == ISD::SETEQ) // Cond never true, remove branch.
14829  return N->getOperand(0);
14830  // Always !=, turn it into an unconditional branch.
14831  return DAG.getNode(ISD::BR, dl, MVT::Other,
14832  N->getOperand(0), N->getOperand(4));
14833  }
14834 
14835  bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
14836 
14837  // Create the PPCISD altivec 'dot' comparison node.
14838  SDValue Ops[] = {
14839  LHS.getOperand(2), // LHS of compare
14840  LHS.getOperand(3), // RHS of compare
14841  DAG.getConstant(CompareOpc, dl, MVT::i32)
14842  };
14843  EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
14844  SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
14845 
14846  // Unpack the result based on how the target uses it.
14847  PPC::Predicate CompOpc;
14848  switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
14849  default: // Can't happen, don't crash on invalid number though.
14850  case 0: // Branch on the value of the EQ bit of CR6.
14851  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
14852  break;
14853  case 1: // Branch on the inverted value of the EQ bit of CR6.
14854  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
14855  break;
14856  case 2: // Branch on the value of the LT bit of CR6.
14857  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
14858  break;
14859  case 3: // Branch on the inverted value of the LT bit of CR6.
14860  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
14861  break;
14862  }
14863 
14864  return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
14865  DAG.getConstant(CompOpc, dl, MVT::i32),
14866  DAG.getRegister(PPC::CR6, MVT::i32),
14867  N->getOperand(4), CompNode.getValue(1));
14868  }
14869  break;
14870  }
14871  case ISD::BUILD_VECTOR:
14872  return DAGCombineBuildVector(N, DCI);
14873  case ISD::ABS:
14874  return combineABS(N, DCI);
14875  case ISD::VSELECT:
14876  return combineVSelect(N, DCI);
14877  }
14878 
14879  return SDValue();
14880 }
14881 
14882 SDValue
14884  SelectionDAG &DAG,
14885  SmallVectorImpl<SDNode *> &Created) const {
14886  // fold (sdiv X, pow2)
14887  EVT VT = N->getValueType(0);
14888  if (VT == MVT::i64 && !Subtarget.isPPC64())
14889  return SDValue();
14890  if ((VT != MVT::i32 && VT != MVT::i64) ||
14891  !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
14892  return SDValue();
14893 
14894  SDLoc DL(N);
14895  SDValue N0 = N->getOperand(0);
14896 
14897  bool IsNegPow2 = (-Divisor).isPowerOf2();
14898  unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
14899  SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
14900 
14901  SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
14902  Created.push_back(Op.getNode());
14903 
14904  if (IsNegPow2) {
14905  Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
14906  Created.push_back(Op.getNode());
14907  }
14908 
14909  return Op;
14910 }
14911 
14912 //===----------------------------------------------------------------------===//
14913 // Inline Assembly Support
14914 //===----------------------------------------------------------------------===//
14915 
14917  KnownBits &Known,
14918  const APInt &DemandedElts,
14919  const SelectionDAG &DAG,
14920  unsigned Depth) const {
14921  Known.resetAll();
14922  switch (Op.getOpcode()) {
14923  default: break;
14924  case PPCISD::LBRX: {
14925  // lhbrx is known to have the top bits cleared out.
14926  if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
14927  Known.Zero = 0xFFFF0000;
14928  break;
14929  }
14930  case ISD::INTRINSIC_WO_CHAIN: {
14931  switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
14932  default: break;
14933  case Intrinsic::ppc_altivec_vcmpbfp_p:
14934  case Intrinsic::ppc_altivec_vcmpeqfp_p:
14935  case Intrinsic::ppc_altivec_vcmpequb_p:
14936  case Intrinsic::ppc_altivec_vcmpequh_p:
14937  case Intrinsic::ppc_altivec_vcmpequw_p:
14938  case Intrinsic::ppc_altivec_vcmpequd_p:
14939  case Intrinsic::ppc_altivec_vcmpequq_p:
14940  case Intrinsic::ppc_altivec_vcmpgefp_p:
14941  case Intrinsic::ppc_altivec_vcmpgtfp_p:
14942  case Intrinsic::ppc_altivec_vcmpgtsb_p:
14943  case Intrinsic::ppc_altivec_vcmpgtsh_p:
14944  case Intrinsic::ppc_altivec_vcmpgtsw_p:
14945  case Intrinsic::ppc_altivec_vcmpgtsd_p:
14946  case Intrinsic::ppc_altivec_vcmpgtsq_p:
14947  case Intrinsic::ppc_altivec_vcmpgtub_p:
14948  case Intrinsic::ppc_altivec_vcmpgtuh_p:
14949  case Intrinsic::ppc_altivec_vcmpgtuw_p:
14950  case Intrinsic::ppc_altivec_vcmpgtud_p:
14951  case Intrinsic::ppc_altivec_vcmpgtuq_p:
14952  Known.Zero = ~1U; // All bits but the low one are known to be zero.
14953  break;
14954  }
14955  }
14956  }
14957 }
14958 
14960  switch (Subtarget.getCPUDirective()) {
14961  default: break;
14962  case PPC::DIR_970:
14963  case PPC::DIR_PWR4:
14964  case PPC::DIR_PWR5:
14965  case PPC::DIR_PWR5X:
14966  case PPC::DIR_PWR6:
14967  case PPC::DIR_PWR6X:
14968  case PPC::DIR_PWR7:
14969  case PPC::DIR_PWR8:
14970  case PPC::DIR_PWR9:
14971  case PPC::DIR_PWR10:
14972  case PPC::DIR_PWR_FUTURE: {
14973  if (!ML)
14974  break;
14975 
14977  // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
14978  // so that we can decrease cache misses and branch-prediction misses.
14979  // Actual alignment of the loop will depend on the hotness check and other
14980  // logic in alignBlocks.
14981  if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
14982  return Align(32);
14983  }
14984 
14985  const PPCInstrInfo *TII = Subtarget.getInstrInfo();
14986 
14987  // For small loops (between 5 and 8 instructions), align to a 32-byte
14988  // boundary so that the entire loop fits in one instruction-cache line.
14989  uint64_t LoopSize = 0;
14990  for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
14991  for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
14992  LoopSize += TII->getInstSizeInBytes(*J);
14993  if (LoopSize > 32)
14994  break;
14995  }
14996 
14997  if (LoopSize > 16 && LoopSize <= 32)
14998  return Align(32);
14999 
15000  break;
15001  }
15002  }
15003 
15005 }
15006 
15007 /// getConstraintType - Given a constraint, return the type of
15008 /// constraint it is for this target.
15011  if (Constraint.size() == 1) {
15012  switch (Constraint[0]) {
15013  default: break;
15014  case 'b':
15015  case 'r':
15016  case 'f':
15017  case 'd':
15018  case 'v':
15019  case 'y':
15020  return C_RegisterClass;
15021  case 'Z':
15022  // FIXME: While Z does indicate a memory constraint, it specifically
15023  // indicates an r+r address (used in conjunction with the 'y' modifier
15024  // in the replacement string). Currently, we're forcing the base
15025  // register to be r0 in the asm printer (which is interpreted as zero)
15026  // and forming the complete address in the second register. This is
15027  // suboptimal.
15028  return C_Memory;
15029  }
15030  } else if (Constraint == "wc") { // individual CR bits.
15031  return C_RegisterClass;
15032  } else if (Constraint == "wa" || Constraint == "wd" ||
15033  Constraint == "wf" || Constraint == "ws" ||
15034  Constraint == "wi" || Constraint == "ww") {
15035  return C_RegisterClass; // VSX registers.
15036  }
15037  return TargetLowering::getConstraintType(Constraint);
15038 }
15039 
15040 /// Examine constraint type and operand type and determine a weight value.
15041 /// This object must already have been set up with the operand type
15042 /// and the current alternative constraint selected.
15045  AsmOperandInfo &info, const char *constraint) const {
15046  ConstraintWeight weight = CW_Invalid;
15047  Value *CallOperandVal = info.CallOperandVal;
15048  // If we don't have a value, we can't do a match,
15049  // but allow it at the lowest weight.
15050  if (!CallOperandVal)
15051  return CW_Default;
15052  Type *type = CallOperandVal->getType();
15053 
15054  // Look at the constraint type.
15055  if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
15056  return CW_Register; // an individual CR bit.
15057  else if ((StringRef(constraint) == "wa" ||
15058  StringRef(constraint) == "wd" ||
15059  StringRef(constraint) == "wf") &&
15060  type->isVectorTy())
15061  return CW_Register;
15062  else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
15063  return CW_Register; // just hold 64-bit integers data.
15064  else if (StringRef(constraint) == "ws" && type->isDoubleTy())
15065  return CW_Register;
15066  else if (StringRef(constraint) == "ww" && type->isFloatTy())
15067  return CW_Register;
15068 
15069  switch (*constraint) {
15070  default:
15072  break;
15073  case 'b':
15074  if (type->isIntegerTy())
15075  weight = CW_Register;
15076  break;
15077  case 'f':
15078  if (type->isFloatTy())
15079  weight = CW_Register;
15080  break;
15081  case 'd':
15082  if (type->isDoubleTy())
15083  weight = CW_Register;
15084  break;
15085  case 'v':
15086  if (type->isVectorTy())
15087  weight = CW_Register;
15088  break;
15089  case 'y':
15090  weight = CW_Register;
15091  break;
15092  case 'Z':
15093  weight = CW_Memory;
15094  break;
15095  }
15096  return weight;
15097 }
15098 
15099 std::pair<unsigned, const TargetRegisterClass *>
15101  StringRef Constraint,
15102  MVT VT) const {
15103  if (Constraint.size() == 1) {
15104  // GCC RS6000 Constraint Letters
15105  switch (Constraint[0]) {
15106  case 'b': // R1-R31
15107  if (VT == MVT::i64 && Subtarget.isPPC64())
15108  return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15109  return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15110  case 'r': // R0-R31
15111  if (VT == MVT::i64 && Subtarget.isPPC64())
15112  return std::make_pair(0U, &PPC::G8RCRegClass);
15113  return std::make_pair(0U, &PPC::GPRCRegClass);
15114  // 'd' and 'f' constraints are both defined to be "the floating point
15115  // registers", where one is for 32-bit and the other for 64-bit. We don't
15116  // really care overly much here so just give them all the same reg classes.
15117  case 'd':
15118  case 'f':
15119  if (Subtarget.hasSPE()) {
15120  if (VT == MVT::f32 || VT == MVT::i32)
15121  return std::make_pair(0U, &PPC::GPRCRegClass);
15122  if (VT == MVT::f64 || VT == MVT::i64)
15123  return std::make_pair(0U, &PPC::SPERCRegClass);
15124  } else {
15125  if (VT == MVT::f32 || VT == MVT::i32)
15126  return std::make_pair(0U, &PPC::F4RCRegClass);
15127  if (VT == MVT::f64 || VT == MVT::i64)
15128  return std::make_pair(0U, &PPC::F8RCRegClass);
15129  }
15130  break;
15131  case 'v':
15132  if (Subtarget.hasAltivec())
15133  return std::make_pair(0U, &PPC::VRRCRegClass);
15134  break;
15135  case 'y': // crrc
15136  return std::make_pair(0U, &PPC::CRRCRegClass);
15137  }
15138  } else if (Constraint == "wc" && Subtarget.useCRBits()) {
15139  // An individual CR bit.
15140  return std::make_pair(0U, &PPC::CRBITRCRegClass);
15141  } else if ((Constraint == "wa" || Constraint == "wd" ||
15142  Constraint == "wf" || Constraint == "wi") &&
15143  Subtarget.hasVSX()) {
15144  return std::make_pair(0U, &PPC::VSRCRegClass);
15145  } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
15146  if (VT == MVT::f32 && Subtarget.hasP8Vector())
15147  return std::make_pair(0U, &PPC::VSSRCRegClass);
15148  else
15149  return std::make_pair(0U, &PPC::VSFRCRegClass);
15150  } else if (Constraint == "lr") {
15151  if (VT == MVT::i64)
15152  return std::make_pair(0U, &PPC::LR8RCRegClass);
15153  else
15154  return std::make_pair(0U, &PPC::LRRCRegClass);
15155  }
15156 
15157  // Handle special cases of physical registers that are not properly handled
15158  // by the base class.
15159  if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
15160  // If we name a VSX register, we can't defer to the base class because it
15161  // will not recognize the correct register (their names will be VSL{0-31}
15162  // and V{0-31} so they won't match). So we match them here.
15163  if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
15164  int VSNum = atoi(Constraint.data() + 3);
15165  assert(VSNum >= 0 && VSNum <= 63 &&
15166  "Attempted to access a vsr out of range");
15167  if (VSNum < 32)
15168  return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
15169  return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
15170  }
15171 
15172  // For float registers, we can't defer to the base class as it will match
15173  // the SPILLTOVSRRC class.
15174  if (Constraint.size() > 3 && Constraint[1] == 'f') {
15175  int RegNum = atoi(Constraint.data() + 2);
15176  if (RegNum > 31 || RegNum < 0)
15177  report_fatal_error("Invalid floating point register number");
15178  if (VT == MVT::f32 || VT == MVT::i32)
15179  return Subtarget.hasSPE()
15180  ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
15181  : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
15182  if (VT == MVT::f64 || VT == MVT::i64)
15183  return Subtarget.hasSPE()
15184  ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
15185  : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
15186  }
15187  }
15188 
15189  std::pair<unsigned, const TargetRegisterClass *> R =
15191 
15192  // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
15193  // (which we call X[0-9]+). If a 64-bit value has been requested, and a
15194  // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
15195  // register.
15196  // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
15197  // the AsmName field from *RegisterInfo.td, then this would not be necessary.
15198  if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
15199  PPC::GPRCRegClass.contains(R.first))
15200  return std::make_pair(TRI->getMatchingSuperReg(R.first,
15201  PPC::sub_32, &PPC::G8RCRegClass),
15202  &PPC::G8RCRegClass);
15203 
15204  // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
15205  if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
15206  R.first = PPC::CR0;
15207  R.second = &PPC::CRRCRegClass;
15208  }
15209 
15210  return R;
15211 }
15212 
15213 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
15214 /// vector. If it is invalid, don't add anything to Ops.
15216  std::string &Constraint,
15217  std::vector<SDValue>&Ops,
15218  SelectionDAG &DAG) const {
15219  SDValue Result;
15220 
15221  // Only support length 1 constraints.
15222  if (Constraint.length() > 1) return;
15223 
15224  char Letter = Constraint[0];
15225  switch (Letter) {
15226  default: break;
15227  case 'I':
15228  case 'J':
15229  case 'K':
15230  case 'L':
15231  case 'M':
15232  case 'N':
15233  case 'O':
15234  case 'P': {
15235  ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
15236  if (!CST) return; // Must be an immediate to match.
15237  SDLoc dl(Op);
15238  int64_t Value = CST->getSExtValue();
15239  EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
15240  // numbers are printed as such.
15241  switch (Letter) {
15242  default: llvm_unreachable("Unknown constraint letter!");
15243  case 'I': // "I" is a signed 16-bit constant.
15244  if (isInt<16>(Value))
15245  Result = DAG.getTargetConstant(Value, dl, TCVT);
15246  break;
15247  case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
15248  if (isShiftedUInt<16, 16>(Value))
15249  Result = DAG.getTargetConstant(Value, dl, TCVT);
15250  break;
15251  case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
15252  if (isShiftedInt<16, 16>(Value))
15253  Result = DAG.getTargetConstant(Value, dl, TCVT);
15254  break;
15255  case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
15256  if (isUInt<16>(Value))
15257  Result = DAG.getTargetConstant(Value, dl, TCVT);
15258  break;
15259  case 'M': // "M" is a constant that is greater than 31.
15260  if (Value > 31)
15261  Result = DAG.getTargetConstant(Value, dl, TCVT);
15262  break;
15263  case 'N': // "N" is a positive constant that is an exact power of two.
15264  if (Value > 0 && isPowerOf2_64(Value))
15265  Result = DAG.getTargetConstant(Value, dl, TCVT);
15266  break;
15267  case 'O': // "O" is the constant zero.
15268  if (Value == 0)
15269  Result = DAG.getTargetConstant(Value, dl, TCVT);
15270  break;
15271  case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
15272  if (isInt<16>(-Value))
15273  Result = DAG.getTargetConstant(Value, dl, TCVT);
15274  break;
15275  }
15276  break;
15277  }
15278  }
15279 
15280  if (Result.getNode()) {
15281  Ops.push_back(Result);
15282  return;
15283  }
15284 
15285  // Handle standard constraint letters.
15286  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
15287 }
15288 
15289 // isLegalAddressingMode - Return true if the addressing mode represented
15290 // by AM is legal for this target, for a load/store of the specified type.
15292  const AddrMode &AM, Type *Ty,
15293  unsigned AS,
15294  Instruction *I) const {
15295  // Vector type r+i form is supported since power9 as DQ form. We don't check
15296  // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
15297  // imm form is preferred and the offset can be adjusted to use imm form later
15298  // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
15299  // max offset to check legal addressing mode, we should be a little aggressive
15300  // to contain other offsets for that LSRUse.
15301  if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
15302  return false;
15303 
15304  // PPC allows a sign-extended 16-bit immediate field.
15305  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
15306  return false;
15307 
15308  // No global is ever allowed as a base.
15309  if (AM.BaseGV)
15310  return false;
15311 
15312  // PPC only support r+r,
15313  switch (AM.Scale) {
15314  case 0: // "r+i" or just "i", depending on HasBaseReg.
15315  break;
15316  case 1:
15317  if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
15318  return false;
15319  // Otherwise we have r+r or r+i.
15320  break;
15321  case 2:
15322  if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
15323  return false;
15324  // Allow 2*r as r+r.
15325  break;
15326  default:
15327  // No other scales are supported.
15328  return false;
15329  }
15330 
15331  return true;
15332 }
15333 
15334 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
15335  SelectionDAG &DAG) const {
15336  MachineFunction &MF = DAG.getMachineFunction();
15337  MachineFrameInfo &MFI = MF.getFrameInfo();
15338  MFI.setReturnAddressIsTaken(true);
15339 
15341  return SDValue();
15342 
15343  SDLoc dl(Op);
15344  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15345 
15346  // Make sure the function does not optimize away the store of the RA to
15347  // the stack.
15348  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
15349  FuncInfo->setLRStoreRequired();
15350  bool isPPC64 = Subtarget.isPPC64();
15351  auto PtrVT = getPointerTy(MF.getDataLayout());
15352 
15353  if (Depth > 0) {
15354  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
15355  SDValue Offset =
15356  DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
15357  isPPC64 ? MVT::i64 : MVT::i32);
15358  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
15359  DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
15360  MachinePointerInfo());
15361  }
15362 
15363  // Just load the return address off the stack.
15364  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
15365  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
15366  MachinePointerInfo());
15367 }
15368 
15369 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
15370  SelectionDAG &DAG) const {
15371  SDLoc dl(Op);
15372  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15373 
15374  MachineFunction &MF = DAG.getMachineFunction();
15375  MachineFrameInfo &MFI = MF.getFrameInfo();
15376  MFI.setFrameAddressIsTaken(true);
15377 
15378  EVT PtrVT = getPointerTy(MF.getDataLayout());
15379  bool isPPC64 = PtrVT == MVT::i64;
15380 
15381  // Naked functions never have a frame pointer, and so we use r1. For all
15382  // other functions, this decision must be delayed until during PEI.
15383  unsigned FrameReg;
15384  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
15385  FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
15386  else
15387  FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
15388 
15389  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
15390  PtrVT);
15391  while (Depth--)
15392  FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
15393  FrameAddr, MachinePointerInfo());
15394  return FrameAddr;
15395 }
15396 
15397 // FIXME? Maybe this could be a TableGen attribute on some registers and
15398 // this table could be generated automatically from RegInfo.
15400  const MachineFunction &MF) const {
15401  bool isPPC64 = Subtarget.isPPC64();
15402 
15403  bool is64Bit = isPPC64 && VT == LLT::scalar(64);
15404  if (!is64Bit && VT != LLT::scalar(32))
15405  report_fatal_error("Invalid register global variable type");
15406 
15408  .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
15409  .Case("r2", isPPC64 ? Register() : PPC::R2)
15410  .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
15411  .Default(Register());
15412 
15413  if (Reg)
15414  return Reg;
15415  report_fatal_error("Invalid register name global variable");
15416 }
15417 
15419  // 32-bit SVR4 ABI access everything as got-indirect.
15420  if (Subtarget.is32BitELFABI())
15421  return true;
15422 
15423  // AIX accesses everything indirectly through the TOC, which is similar to
15424  // the GOT.
15425  if (Subtarget.isAIXABI())
15426  return true;
15427 
15429  // If it is small or large code model, module locals are accessed
15430  // indirectly by loading their address from .toc/.got.
15431  if (CModel == CodeModel::Small || CModel == CodeModel::Large)
15432  return true;
15433 
15434  // JumpTable and BlockAddress are accessed as got-indirect.
15435  if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
15436  return true;
15437 
15438  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
15439  return Subtarget.isGVIndirectSymbol(G->getGlobal());
15440 
15441  return false;
15442 }
15443 
15444 bool
15446  // The PowerPC target isn't yet aware of offsets.
15447  return false;
15448 }
15449 
15451  const CallInst &I,
15452  MachineFunction &MF,
15453  unsigned Intrinsic) const {
15454  switch (Intrinsic) {
15455  case Intrinsic::ppc_altivec_lvx:
15456  case Intrinsic::ppc_altivec_lvxl:
15457  case Intrinsic::ppc_altivec_lvebx:
15458  case Intrinsic::ppc_altivec_lvehx:
15459  case Intrinsic::ppc_altivec_lvewx:
15460  case Intrinsic::ppc_vsx_lxvd2x:
15461  case Intrinsic::ppc_vsx_lxvw4x:
15462  case Intrinsic::ppc_vsx_lxvd2x_be:
15463  case Intrinsic::ppc_vsx_lxvw4x_be:
15464  case Intrinsic::ppc_vsx_lxvl:
15465  case Intrinsic::ppc_vsx_lxvll: {
15466  EVT VT;
15467  switch (Intrinsic) {
15468  case Intrinsic::ppc_altivec_lvebx:
15469  VT = MVT::i8;
15470  break;
15471  case Intrinsic::ppc_altivec_lvehx:
15472  VT = MVT::i16;
15473  break;
15474  case Intrinsic::ppc_altivec_lvewx:
15475  VT = MVT::i32;
15476  break;
15477  case Intrinsic::ppc_vsx_lxvd2x:
15478  case Intrinsic::ppc_vsx_lxvd2x_be:
15479  VT = MVT::v2f64;
15480  break;
15481  default:
15482  VT = MVT::v4i32;
15483  break;
15484  }
15485 
15487  Info.memVT = VT;
15488  Info.ptrVal = I.getArgOperand(0);
15489  Info.offset = -VT.getStoreSize()+1;
15490  Info.size = 2*VT.getStoreSize()-1;
15491  Info.align = Align(1);
15493  return true;
15494  }
15495  case Intrinsic::ppc_altivec_stvx:
15496  case Intrinsic::ppc_altivec_stvxl:
15497  case Intrinsic::ppc_altivec_stvebx:
15498  case Intrinsic::ppc_altivec_stvehx:
15499  case Intrinsic::ppc_altivec_stvewx:
15500  case Intrinsic::ppc_vsx_stxvd2x:
15501  case Intrinsic::ppc_vsx_stxvw4x:
15502  case Intrinsic::ppc_vsx_stxvd2x_be:
15503  case Intrinsic::ppc_vsx_stxvw4x_be:
15504  case Intrinsic::ppc_vsx_stxvl:
15505  case Intrinsic::ppc_vsx_stxvll: {
15506  EVT VT;
15507  switch (Intrinsic) {
15508  case Intrinsic::ppc_altivec_stvebx:
15509  VT = MVT::i8;
15510  break;
15511  case Intrinsic::ppc_altivec_stvehx:
15512  VT = MVT::i16;
15513  break;
15514  case Intrinsic::ppc_altivec_stvewx:
15515  VT = MVT::i32;
15516  break;
15517  case Intrinsic::ppc_vsx_stxvd2x:
15518  case Intrinsic::ppc_vsx_stxvd2x_be:
15519  VT = MVT::v2f64;
15520  break;
15521  default:
15522  VT = MVT::v4i32;
15523  break;
15524  }
15525 
15526  Info.opc = ISD::INTRINSIC_VOID;
15527  Info.memVT = VT;
15528  Info.ptrVal = I.getArgOperand(1);
15529  Info.offset = -VT.getStoreSize()+1;
15530  Info.size = 2*VT.getStoreSize()-1;
15531  Info.align = Align(1);
15533  return true;
15534  }
15535  default:
15536  break;
15537  }
15538 
15539  return false;
15540 }
15541 
15542 /// It returns EVT::Other if the type should be determined using generic
15543 /// target-independent logic.
15545  const MemOp &Op, const AttributeList &FuncAttributes) const {
15546  if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
15547  // We should use Altivec/VSX loads and stores when available. For unaligned
15548  // addresses, unaligned VSX loads are only fast starting with the P8.
15549  if (Subtarget.hasAltivec() && Op.size() >= 16 &&
15550  (Op.isAligned(Align(16)) ||
15551  ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
15552  return MVT::v4i32;
15553  }
15554 
15555  if (Subtarget.isPPC64()) {
15556  return MVT::i64;
15557  }
15558 
15559  return MVT::i32;
15560 }
15561 
15562 /// Returns true if it is beneficial to convert a load of a constant
15563 /// to just the constant itself.
15565  Type *Ty) const {
15566  assert(Ty->isIntegerTy());
15567 
15568  unsigned BitSize = Ty->getPrimitiveSizeInBits();
15569  return !(BitSize == 0 || BitSize > 64);
15570 }
15571 
15573  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
15574  return false;
15575  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
15576  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
15577  return NumBits1 == 64 && NumBits2 == 32;
15578 }
15579 
15581  if (!VT1.isInteger() || !VT2.isInteger())
15582  return false;
15583  unsigned NumBits1 = VT1.getSizeInBits();
15584  unsigned NumBits2 = VT2.getSizeInBits();
15585  return NumBits1 == 64 && NumBits2 == 32;
15586 }
15587 
15589  // Generally speaking, zexts are not free, but they are free when they can be
15590  // folded with other operations.
15591  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
15592  EVT MemVT = LD->getMemoryVT();
15593  if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
15594  (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
15595  (LD->getExtensionType() == ISD::NON_EXTLOAD ||
15596  LD->getExtensionType() == ISD::ZEXTLOAD))
15597  return true;
15598  }
15599 
15600  // FIXME: Add other cases...
15601  // - 32-bit shifts with a zext to i64
15602  // - zext after ctlz, bswap, etc.
15603  // - zext after and by a constant mask
15604 
15605  return TargetLowering::isZExtFree(Val, VT2);
15606 }
15607 
15608 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
15609  assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
15610  "invalid fpext types");
15611  // Extending to float128 is not free.
15612  if (DestVT == MVT::f128)
15613  return false;
15614  return true;
15615 }
15616 
15618  return isInt<16>(Imm) || isUInt<16>(Imm);
15619 }
15620 
15622  return isInt<16>(Imm) || isUInt<16>(Imm);
15623 }
15624 
15626  unsigned,
15627  unsigned,
15629  bool *Fast) const {
15630  if (DisablePPCUnaligned)
15631  return false;
15632 
15633  // PowerPC supports unaligned memory access for simple non-vector types.
15634  // Although accessing unaligned addresses is not as efficient as accessing
15635  // aligned addresses, it is generally more efficient than manual expansion,
15636  // and generally only traps for software emulation when crossing page
15637  // boundaries.
15638 
15639  if (!VT.isSimple())
15640  return false;
15641 
15642  if (VT.isFloatingPoint() && !VT.isVector() &&
15643  !Subtarget.allowsUnalignedFPAccess())
15644  return false;
15645 
15646  if (VT.getSimpleVT().isVector()) {
15647  if (Subtarget.hasVSX()) {
15648  if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
15649  VT != MVT::v4f32 && VT != MVT::v4i32)
15650  return false;
15651  } else {
15652  return false;
15653  }
15654  }
15655 
15656  if (VT == MVT::ppcf128)
15657  return false;
15658 
15659  if (Fast)
15660  *Fast = true;
15661 
15662  return true;
15663 }
15664 
15666  SDValue C) const {
15667  // Check integral scalar types.
15668  if (!VT.isScalarInteger())
15669  return false;
15670  if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
15671  if (!ConstNode->getAPIntValue().isSignedIntN(64))
15672  return false;
15673  // This transformation will generate >= 2 operations. But the following
15674  // cases will generate <= 2 instructions during ISEL. So exclude them.
15675  // 1. If the constant multiplier fits 16 bits, it can be handled by one
15676  // HW instruction, ie. MULLI
15677  // 2. If the multiplier after shifted fits 16 bits, an extra shift
15678  // instruction is needed than case 1, ie. MULLI and RLDICR
15679  int64_t Imm = ConstNode->getSExtValue();
15680  unsigned Shift = countTrailingZeros<uint64_t>(Imm);
15681  Imm >>= Shift;
15682  if (isInt<16>(Imm))
15683  return false;
15684  uint64_t UImm = static_cast<uint64_t>(Imm);
15685  if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
15686  isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
15687  return true;
15688  }
15689  return false;
15690 }
15691 
15693  EVT VT) const {
15695  MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
15696 }
15697 
15699  Type *Ty) const {
15700  switch (Ty->getScalarType()->getTypeID()) {
15701  case Type::FloatTyID:
15702  case Type::DoubleTyID:
15703  return true;
15704  case Type::FP128TyID:
15705  return Subtarget.hasP9Vector();
15706  default:
15707  return false;
15708  }
15709 }
15710 
15711 // FIXME: add more patterns which are not profitable to hoist.
15713  if (!I->hasOneUse())
15714  return true;
15715 
15716  Instruction *User = I->user_back();
15717  assert(User && "A single use instruction with no uses.");
15718 
15719  switch (I->getOpcode()) {
15720  case Instruction::FMul: {
15721  // Don't break FMA, PowerPC prefers FMA.
15722  if (User->getOpcode() != Instruction::FSub &&
15723  User->getOpcode() != Instruction::FAdd)
15724  return true;
15725 
15726  const TargetOptions &Options = getTargetMachine().Options;
15727  const Function *F = I->getFunction();
15728  const DataLayout &DL = F->getParent()->getDataLayout();
15729  Type *Ty = User->getOperand(0)->getType();
15730 
15731  return !(
15732  isFMAFasterThanFMulAndFAdd(*F, Ty) &&
15734  (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
15735  }
15736  case Instruction::Load: {
15737  // Don't break "store (load float*)" pattern, this pattern will be combined
15738  // to "store (load int32)" in later InstCombine pass. See function
15739  // combineLoadToOperationType. On PowerPC, loading a float point takes more
15740  // cycles than loading a 32 bit integer.
15741  LoadInst *LI = cast<LoadInst>(I);
15742  // For the loads that combineLoadToOperationType does nothing, like
15743  // ordered load, it should be profitable to hoist them.
15744  // For swifterror load, it can only be used for pointer to pointer type, so
15745  // later type check should get rid of this case.
15746  if (!LI->isUnordered())
15747  return true;
15748 
15749  if (User->getOpcode() != Instruction::Store)
15750  return true;
15751 
15752  if (I->getType()->getTypeID() != Type::FloatTyID)
15753  return true;
15754 
15755  return false;
15756  }
15757  default:
15758  return true;
15759  }
15760  return true;
15761 }
15762 
15763 const MCPhysReg *
15765  // LR is a callee-save register, but we must treat it as clobbered by any call
15766  // site. Hence we include LR in the scratch registers, which are in turn added
15767  // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
15768  // to CTR, which is used by any indirect call.
15769  static const MCPhysReg ScratchRegs[] = {
15770  PPC::X12, PPC::LR8, PPC::CTR8, 0
15771  };
15772 
15773  return ScratchRegs;
15774 }
15775 
15777  const Constant *PersonalityFn) const {
15778  return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
15779 }
15780 
15782  const Constant *PersonalityFn) const {
15783  return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
15784 }
15785 
15786 bool
15788  EVT VT , unsigned DefinedValues) const {
15789  if (VT == MVT::v2i64)
15790  return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
15791 
15792  if (Subtarget.hasVSX())
15793  return true;
15794 
15795  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
15796 }
15797 
15799  if (DisableILPPref || Subtarget.enableMachineScheduler())
15801 
15802  return Sched::ILP;
15803 }
15804 
15805 // Create a fast isel object.
15806 FastISel *
15808  const TargetLibraryInfo *LibInfo) const {
15809  return PPC::createFastISel(FuncInfo, LibInfo);
15810 }
15811 
15812 // 'Inverted' means the FMA opcode after negating one multiplicand.
15813 // For example, (fma -a b c) = (fnmsub a b c)
15814 static unsigned invertFMAOpcode(unsigned Opc) {
15815  switch (Opc) {
15816  default:
15817  llvm_unreachable("Invalid FMA opcode for PowerPC!");
15818  case ISD::FMA:
15819  return PPCISD::FNMSUB;
15820  case PPCISD::FNMSUB:
15821  return ISD::FMA;
15822  }
15823 }
15824 
15826  bool LegalOps, bool OptForSize,
15827  NegatibleCost &Cost,
15828  unsigned Depth) const {
15830  return SDValue();
15831 
15832  unsigned Opc = Op.getOpcode();
15833  EVT VT = Op.getValueType();
15834  SDNodeFlags Flags = Op.getNode()->getFlags();
15835 
15836  switch (Opc) {
15837  case PPCISD::FNMSUB:
15838  if (!Op.hasOneUse() || !isTypeLegal(VT))
15839  break;
15840 
15841  const TargetOptions &Options = getTargetMachine().Options;
15842  SDValue N0 = Op.getOperand(0);
15843  SDValue N1 = Op.getOperand(1);
15844  SDValue N2 = Op.getOperand(2);
15845  SDLoc Loc(Op);
15846 
15848  SDValue NegN2 =
15849  getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
15850 
15851  if (!NegN2)
15852  return SDValue();
15853 
15854  // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
15855  // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
15856  // These transformations may change sign of zeroes. For example,
15857  // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
15858  if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
15859  // Try and choose the cheaper one to negate.
15861  SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
15862  N0Cost, Depth + 1);
15863 
15865  SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
15866  N1Cost, Depth + 1);
15867 
15868  if (NegN0 && N0Cost <= N1Cost) {
15869  Cost = std::min(N0Cost, N2Cost);
15870  return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
15871  } else if (NegN1) {
15872  Cost = std::min(N1Cost, N2Cost);
15873  return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
15874  }
15875  }
15876 
15877  // (fneg (fnmsub a b c)) => (fma a b (fneg c))
15878  if (isOperationLegal(ISD::FMA, VT)) {
15879  Cost = N2Cost;
15880  return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
15881  }
15882 
15883  break;
15884  }
15885 
15886  return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
15887  Cost, Depth);
15888 }
15889 
15890 // Override to enable LOAD_STACK_GUARD lowering on Linux.
15892  if (!Subtarget.isTargetLinux())
15894  return true;
15895 }
15896 
15897 // Override to disable global variable loading on Linux.
15899  if (!Subtarget.isTargetLinux())
15901 }
15902 
15904  bool ForCodeSize) const {
15905  if (!VT.isSimple() || !Subtarget.hasVSX())
15906  return false;
15907 
15908  switch(VT.getSimpleVT().SimpleTy) {
15909  default:
15910  // For FP types that are currently not supported by PPC backend, return
15911  // false. Examples: f16, f80.
15912  return false;
15913  case MVT::f32:
15914  case MVT::f64:
15915  if (Subtarget.hasPrefixInstrs()) {
15916  // With prefixed instructions, we can materialize anything that can be
15917  // represented with a 32-bit immediate, not just positive zero.
15918  APFloat APFloatOfImm = Imm;
15919  return convertToNonDenormSingle(APFloatOfImm);
15920  }
15922  case MVT::ppcf128:
15923  return Imm.isPosZero();
15924  }
15925 }
15926 
15927 // For vector shift operation op, fold
15928 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
15930  SelectionDAG &DAG) {
15931  SDValue N0 = N->getOperand(0);
15932  SDValue N1 = N->getOperand(1);
15933  EVT VT = N0.getValueType();
15934  unsigned OpSizeInBits = VT.getScalarSizeInBits();
15935  unsigned Opcode = N->getOpcode();
15936  unsigned TargetOpcode;
15937 
15938  switch (Opcode) {
15939  default:
15940  llvm_unreachable("Unexpected shift operation");
15941  case ISD::SHL:
15942  TargetOpcode = PPCISD::SHL;
15943  break;
15944  case ISD::SRL:
15945  TargetOpcode = PPCISD::SRL;
15946  break;
15947  case ISD::SRA:
15948  TargetOpcode = PPCISD::SRA;
15949  break;
15950  }
15951 
15952  if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
15953  N1->getOpcode() == ISD::AND)
15955  if (Mask->getZExtValue() == OpSizeInBits - 1)
15956  return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
15957 
15958  return SDValue();
15959 }
15960 
15961 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
15962  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15963  return Value;
15964 
15965  SDValue N0 = N->getOperand(0);
15966  ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
15967  if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
15968  N0.getOpcode() != ISD::SIGN_EXTEND ||
15969  N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
15970  N->getValueType(0) != MVT::i64)
15971  return SDValue();
15972 
15973  // We can't save an operation here if the value is already extended, and
15974  // the existing shift is easier to combine.
15975  SDValue ExtsSrc = N0.getOperand(0);
15976  if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
15977  ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
15978  return SDValue();
15979 
15980  SDLoc DL(N0);
15981  SDValue ShiftBy = SDValue(CN1, 0);
15982  // We want the shift amount to be i32 on the extswli, but the shift could
15983  // have an i64.
15984  if (ShiftBy.getValueType() == MVT::i64)
15985  ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
15986 
15987  return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
15988  ShiftBy);
15989 }
15990 
15991 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
15992  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15993  return Value;
15994 
15995  return SDValue();
15996 }
15997 
15998 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
15999  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16000  return Value;
16001 
16002  return SDValue();
16003 }
16004 
16005 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
16006 // Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
16007 // When C is zero, the equation (addi Z, -C) can be simplified to Z
16008 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
16010  const PPCSubtarget &Subtarget) {
16011  if (!Subtarget.isPPC64())
16012  return SDValue();
16013 
16014  SDValue LHS = N->getOperand(0);
16015  SDValue RHS = N->getOperand(1);
16016 
16017  auto isZextOfCompareWithConstant = [](SDValue Op) {
16018  if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
16019  Op.getValueType() != MVT::i64)
16020  return false;
16021 
16022  SDValue Cmp = Op.getOperand(0);
16023  if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
16024  Cmp.getOperand(0).getValueType() != MVT::i64)
16025  return false;
16026 
16027  if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
16028  int64_t NegConstant = 0 - Constant->getSExtValue();
16029  // Due to the limitations of the addi instruction,
16030  // -C is required to be [-32768, 32767].
16031  return isInt<16>(NegConstant);
16032  }
16033 
16034  return false;
16035  };
16036 
16037  bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
16038  bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
16039 
16040  // If there is a pattern, canonicalize a zext operand to the RHS.
16041  if (LHSHasPattern && !RHSHasPattern)
16042  std::swap(LHS, RHS);
16043  else if (!LHSHasPattern && !RHSHasPattern)
16044  return SDValue();
16045 
16046  SDLoc DL(N);
16047  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
16048  SDValue Cmp = RHS.getOperand(0);
16049  SDValue Z = Cmp.getOperand(0);
16050  auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
16051 
16052  assert(Constant && "Constant Should not be a null pointer.");
16053  int64_t NegConstant = 0 - Constant->getSExtValue();
16054 
16055  switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
16056  default: break;
16057  case ISD::SETNE: {
16058  // when C == 0
16059  // --> addze X, (addic Z, -1).carry
16060  // /
16061  // add X, (zext(setne Z, C))--
16062  // \ when -32768 <= -C <= 32767 && C != 0
16063  // --> addze X, (addic (addi Z, -C), -1).carry
16064  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16065  DAG.getConstant(NegConstant, DL, MVT::i64));
16066  SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16067  SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16068  AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
16069  return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16070  SDValue(Addc.getNode(), 1));
16071  }
16072  case ISD::SETEQ: {
16073  // when C == 0
16074  // --> addze X, (subfic Z, 0).carry
16075  // /
16076  // add X, (zext(sete Z, C))--
16077  // \ when -32768 <= -C <= 32767 && C != 0
16078  // --> addze X, (subfic (addi Z, -C), 0).carry
16079  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16080  DAG.getConstant(NegConstant, DL, MVT::i64));
16081  SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16082  SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16083  DAG.getConstant(0, DL, MVT::i64), AddOrZ);
16084  return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16085  SDValue(Subc.getNode(), 1));
16086  }
16087  }
16088 
16089  return SDValue();
16090 }
16091 
16092 // Transform
16093 // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
16094 // (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
16095 // In this case both C1 and C2 must be known constants.
16096 // C1+C2 must fit into a 34 bit signed integer.
16098  const PPCSubtarget &Subtarget) {
16099  if (!Subtarget.isUsingPCRelativeCalls())
16100  return SDValue();
16101 
16102  // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
16103  // If we find that node try to cast the Global Address and the Constant.
16104  SDValue LHS = N->getOperand(0);
16105  SDValue RHS = N->getOperand(1);
16106 
16107  if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16108  std::swap(LHS, RHS);
16109 
16110  if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16111  return SDValue();
16112 
16113  // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
16114  GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
16115  ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
16116 
16117  // Check that both casts succeeded.
16118  if (!GSDN || !ConstNode)
16119  return SDValue();
16120 
16121  int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
16122  SDLoc DL(GSDN);
16123 
16124  // The signed int offset needs to fit in 34 bits.
16125  if (!isInt<34>(NewOffset))
16126  return SDValue();
16127 
16128  // The new global address is a copy of the old global address except
16129  // that it has the updated Offset.
16130  SDValue GA =
16131  DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
16132  NewOffset, GSDN->getTargetFlags());
16133  SDValue MatPCRel =
16134  DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
16135  return MatPCRel;
16136 }
16137 
16138 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
16139  if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
16140  return Value;
16141 
16142  if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
16143  return Value;
16144 
16145  return SDValue();
16146 }
16147 
16148 // Detect TRUNCATE operations on bitcasts of float128 values.
16149 // What we are looking for here is the situtation where we extract a subset
16150 // of bits from a 128 bit float.
16151 // This can be of two forms:
16152 // 1) BITCAST of f128 feeding TRUNCATE
16153 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
16154 // The reason this is required is because we do not have a legal i128 type
16155 // and so we want to prevent having to store the f128 and then reload part
16156 // of it.
16157 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
16158  DAGCombinerInfo &DCI) const {
16159  // If we are using CRBits then try that first.
16160  if (Subtarget.useCRBits()) {
16161  // Check if CRBits did anything and return that if it did.
16162  if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
16163  return CRTruncValue;
16164  }
16165 
16166  SDLoc dl(N);
16167  SDValue Op0 = N->getOperand(0);
16168 
16169  // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
16170  if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
16171  EVT VT = N->getValueType(0);
16172  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16173  return SDValue();
16174  SDValue Sub = Op0.getOperand(0);
16175  if (Sub.getOpcode() == ISD::SUB) {
16176  SDValue SubOp0 = Sub.getOperand(0);
16177  SDValue SubOp1 = Sub.getOperand(1);
16178  if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
16179  (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
16180  return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
16181  SubOp1.getOperand(0),
16182  DCI.DAG.getTargetConstant(0, dl, MVT::i32));
16183  }
16184  }
16185  }
16186 
16187  // Looking for a truncate of i128 to i64.
16188  if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
16189  return SDValue();
16190 
16191  int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
16192 
16193  // SRL feeding TRUNCATE.
16194  if (Op0.getOpcode() == ISD::SRL) {
16195  ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
16196  // The right shift has to be by 64 bits.
16197  if (!ConstNode || ConstNode->getZExtValue() != 64)
16198  return SDValue();
16199 
16200  // Switch the element number to extract.
16201  EltToExtract = EltToExtract ? 0 : 1;
16202  // Update Op0 past the SRL.
16203  Op0 = Op0.getOperand(0);
16204  }
16205 
16206  // BITCAST feeding a TRUNCATE possibly via SRL.
16207  if (Op0.getOpcode() == ISD::BITCAST &&
16208  Op0.getValueType() == MVT::i128 &&
16209  Op0.getOperand(0).getValueType() == MVT::f128) {
16210  SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
16211  return DCI.DAG.getNode(
16213  DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
16214  }
16215  return SDValue();
16216 }
16217 
16218 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
16219  SelectionDAG &DAG = DCI.DAG;
16220 
16221  ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
16222  if (!ConstOpOrElement)
16223  return SDValue();
16224 
16225  // An imul is usually smaller than the alternative sequence for legal type.
16226  if (DAG.getMachineFunction().getFunction().hasMinSize() &&
16227  isOperationLegal(ISD::MUL, N->getValueType(0)))
16228  return SDValue();
16229 
16230  auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
16231  switch (this->Subtarget.getCPUDirective()) {
16232  default:
16233  // TODO: enhance the condition for subtarget before pwr8
16234  return false;
16235  case PPC::DIR_PWR8:
16236  // type mul add shl
16237  // scalar 4 1 1
16238  // vector 7 2 2
16239  return true;
16240  case PPC::DIR_PWR9:
16241  case PPC::DIR_PWR10:
16242  case PPC::DIR_PWR_FUTURE:
16243  // type mul add shl
16244  // scalar 5 2 2
16245  // vector 7 2 2
16246 
16247  // The cycle RATIO of related operations are showed as a table above.
16248  // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
16249  // scalar and vector type. For 2 instrs patterns, add/sub + shl
16250  // are 4, it is always profitable; but for 3 instrs patterns
16251  // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
16252  // So we should only do it for vector type.
16253  return IsAddOne && IsNeg ? VT.isVector() : true;
16254  }
16255  };
16256 
16257  EVT VT = N->getValueType(0);
16258  SDLoc DL(N);
16259 
16260  const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
16261  bool IsNeg = MulAmt.isNegative();
16262  APInt MulAmtAbs = MulAmt.abs();
16263 
16264  if ((MulAmtAbs - 1).isPowerOf2()) {
16265  // (mul x, 2^N + 1) => (add (shl x, N), x)
16266  // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
16267 
16268  if (!IsProfitable(IsNeg, true, VT))
16269  return SDValue();
16270 
16271  SDValue Op0 = N->getOperand(0);
16272  SDValue Op1 =
16273  DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16274  DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
16275  SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
16276 
16277  if (!IsNeg)
16278  return Res;
16279 
16280  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
16281  } else if ((MulAmtAbs + 1).isPowerOf2()) {
16282  // (mul x, 2^N - 1) => (sub (shl x, N), x)
16283  // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
16284 
16285  if (!IsProfitable(IsNeg, false, VT))
16286  return SDValue();
16287 
16288  SDValue Op0 = N->getOperand(0);
16289  SDValue Op1 =
16290  DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16291  DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
16292 
16293  if (!IsNeg)
16294  return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
16295  else
16296  return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
16297 
16298  } else {
16299  return SDValue();
16300  }
16301 }
16302 
16303 // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
16304 // in combiner since we need to check SD flags and other subtarget features.
16305 SDValue PPCTargetLowering::combineFMALike(SDNode *N,
16306  DAGCombinerInfo &DCI) const {
16307  SDValue N0 = N->getOperand(0);
16308  SDValue N1 = N->getOperand(1);
16309  SDValue N2 = N->getOperand(2);
16310  SDNodeFlags Flags = N->getFlags();
16311  EVT VT = N->getValueType(0);
16312  SelectionDAG &DAG = DCI.DAG;
16313  const TargetOptions &Options = getTargetMachine().Options;
16314  unsigned Opc = N->getOpcode();
16315  bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
16316  bool LegalOps = !DCI.isBeforeLegalizeOps();
16317  SDLoc Loc(N);
16318 
16319  if (!isOperationLegal(ISD::FMA, VT))
16320  return SDValue();
16321 
16322  // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
16323  // since (fnmsub a b c)=-0 while c-ab=+0.
16324  if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
16325  return SDValue();
16326 
16327  // (fma (fneg a) b c) => (fnmsub a b c)
16328  // (fnmsub (fneg a) b c) => (fma a b c)
16329  if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
16330  return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
16331 
16332  // (fma a (fneg b) c) => (fnmsub a b c)
16333  // (fnmsub a (fneg b) c) => (fma a b c)
16334  if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
16335  return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
16336 
16337  return SDValue();
16338 }
16339 
16340 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16341  // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
16342  if (!Subtarget.is64BitELFABI())
16343  return false;
16344 
16345  // If not a tail call then no need to proceed.
16346  if (!CI->isTailCall())
16347  return false;
16348 
16349  // If sibling calls have been disabled and tail-calls aren't guaranteed
16350  // there is no reason to duplicate.
16351  auto &TM = getTargetMachine();
16352  if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
16353  return false;
16354 
16355  // Can't tail call a function called indirectly, or if it has variadic args.
16356  const Function *Callee = CI->getCalledFunction();
16357  if (!Callee || Callee->isVarArg())
16358  return false;
16359 
16360  // Make sure the callee and caller calling conventions are eligible for tco.
16361  const Function *Caller = CI->getParent()->getParent();
16362  if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
16363  CI->getCallingConv()))
16364  return false;
16365 
16366  // If the function is local then we have a good chance at tail-calling it
16367  return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
16368 }
16369 
16370 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
16371  if (!Subtarget.hasVSX())
16372  return false;
16373  if (Subtarget.hasP9Vector() && VT == MVT::f128)
16374  return true;
16375  return VT == MVT::f32 || VT == MVT::f64 ||
16376  VT == MVT::v4f32 || VT == MVT::v2f64;
16377 }
16378 
16379 bool PPCTargetLowering::
16380 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
16381  const Value *Mask = AndI.getOperand(1);
16382  // If the mask is suitable for andi. or andis. we should sink the and.
16383  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
16384  // Can't handle constants wider than 64-bits.
16385  if (CI->getBitWidth() > 64)
16386  return false;
16387  int64_t ConstVal = CI->getZExtValue();
16388  return isUInt<16>(ConstVal) ||
16389  (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
16390  }
16391 
16392  // For non-constant masks, we can always use the record-form and.
16393  return true;
16394 }
16395 
16396 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
16397 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
16398 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
16399 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
16400 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
16401 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
16402  assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
16403  assert(Subtarget.hasP9Altivec() &&
16404  "Only combine this when P9 altivec supported!");
16405  EVT VT = N->getValueType(0);
16406  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16407  return SDValue();
16408 
16409  SelectionDAG &DAG = DCI.DAG;
16410  SDLoc dl(N);
16411  if (N->getOperand(0).getOpcode() == ISD::SUB) {
16412  // Even for signed integers, if it's known to be positive (as signed
16413  // integer) due to zero-extended inputs.
16414  unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
16415  unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
16416  if ((SubOpcd0 == ISD::ZERO_EXTEND ||
16417  SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
16418  (SubOpcd1 == ISD::ZERO_EXTEND ||
16419  SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
16420  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16421  N->getOperand(0)->getOperand(0),
16422  N->getOperand(0)->getOperand(1),
16423  DAG.getTargetConstant(0, dl, MVT::i32));
16424  }
16425 
16426  // For type v4i32, it can be optimized with xvnegsp + vabsduw
16427  if (N->getOperand(0).getValueType() == MVT::v4i32 &&
16428  N->getOperand(0).hasOneUse()) {
16429  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16430  N->getOperand(0)->getOperand(0),
16431  N->getOperand(0)->getOperand(1),
16432  DAG.getTargetConstant(1, dl, MVT::i32));
16433  }
16434  }
16435 
16436  return SDValue();
16437 }
16438 
16439 // For type v4i32/v8ii16/v16i8, transform
16440 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
16441 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
16442 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
16443 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
16444 SDValue PPCTargetLowering::combineVSelect(SDNode *N,
16445  DAGCombinerInfo &DCI) const {
16446  assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
16447  assert(Subtarget.hasP9Altivec() &&
16448  "Only combine this when P9 altivec supported!");
16449 
16450  SelectionDAG &DAG = DCI.DAG;
16451  SDLoc dl(N);
16452  SDValue Cond = N->getOperand(0);
16453  SDValue TrueOpnd = N->getOperand(1);
16454  SDValue FalseOpnd = N->getOperand(2);
16455  EVT VT = N->getOperand(1).getValueType();
16456 
16457  if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
16458  FalseOpnd.getOpcode() != ISD::SUB)
16459  return SDValue();
16460 
16461  // ABSD only available for type v4i32/v8i16/v16i8
16462  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16463  return SDValue();
16464 
16465  // At least to save one more dependent computation
16466  if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
16467  return SDValue();
16468 
16469  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16470 
16471  // Can only handle unsigned comparison here
16472  switch (CC) {
16473  default:
16474  return SDValue();
16475  case ISD::SETUGT:
16476  case ISD::SETUGE:
16477  break;
16478  case ISD::SETULT:
16479  case ISD::SETULE:
16480  std::swap(TrueOpnd, FalseOpnd);
16481  break;
16482  }
16483 
16484  SDValue CmpOpnd1 = Cond.getOperand(0);
16485  SDValue CmpOpnd2 = Cond.getOperand(1);
16486 
16487  // SETCC CmpOpnd1 CmpOpnd2 cond
16488  // TrueOpnd = CmpOpnd1 - CmpOpnd2
16489  // FalseOpnd = CmpOpnd2 - CmpOpnd1
16490  if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
16491  TrueOpnd.getOperand(1) == CmpOpnd2 &&
16492  FalseOpnd.getOperand(0) == CmpOpnd2 &&
16493  FalseOpnd.getOperand(1) == CmpOpnd1) {
16494  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
16495  CmpOpnd1, CmpOpnd2,
16496  DAG.getTargetConstant(0, dl, MVT::i32));
16497  }
16498 
16499  return SDValue();
16500 }
return AArch64::GPR64RegClass contains(Reg)
unsigned const MachineRegisterInfo * MRI
#define Success
static const unsigned PerfectShuffleTable[6561+1]
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool isLoad(int Opcode)
@ OP_COPY
Function Alias Analysis Results
assume Assume Builder
Atomic ordering constants.
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:26
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:280
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
uint64_t Align
uint64_t Offset
uint64_t Addr
uint32_t Index
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RegName(no)
lazy value info
loop rotate
#define F(x, y, z)
Definition: MD5.cpp:56
#define I(x, y, z)
Definition: MD5.cpp:59
unsigned const TargetRegisterInfo * TRI
unsigned Reg
#define R4(n)
#define R2(n)
#define R6(n)
#define T
Module.h This file contains the declarations for the Module class.
uint64_t CallInst * C
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool isConstantOrUndef(int Op, int Val)
isConstantOrUndef - Op is either an undef node or a ConstantSDNode.
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static Instruction * callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id)
static bool callsShareTOCBase(const Function *Caller, SDValue Callee, const TargetMachine &TM)
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static bool isFunctionGlobalAddress(SDValue Callee)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags, const Function &Caller, const SDValue &Callee, const PPCSubtarget &Subtarget, const TargetMachine &TM)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, int64_t &Offset, SelectionDAG &DAG)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64)
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG)
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > EnableSoftFP128("enable-soft-fp128", cl::desc("temp option to enable soft fp128"), cl::Hidden)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
cl::opt< bool > ANDIGlueBug
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is 0.0 or -0.0.
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
@ VI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Whole Quad Mode
static bool isSplat(ArrayRef< Value * > VL)
Shadow Stack GC Lowering
static bool Enabled
Definition: Statistic.cpp:50
This file describes how to lower LLVM code to machine code.
This defines the Use class.
static bool is64Bit(const char *name)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4817
bool isDenormal() const
Definition: APFloat.h:1207
APInt bitcastToAPInt() const
Definition: APFloat.h:1133
bool isPosZero() const
Definition: APFloat.h:1217
Class for arbitrary precision integers.
Definition: APInt.h:70
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1525
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:930
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1631
void setBit(unsigned BitPosition)
Set a given bit to 1.
Definition: APInt.h:1442
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1868
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:567
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:364
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:483
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1782
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:469
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:667
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:655
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
Class to represent array types.
Definition: DerivedTypes.h:359
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:275
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition: Constants.h:851
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
bool isVarArg() const
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
unsigned getLocMemOffset() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
bool needsCustom() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
unsigned getValNo() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1164
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1435
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1288
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1371
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1378
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1294
unsigned arg_size() const
Definition: InstrTypes.h:1311
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:273
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
uint64_t getZExtValue() const
int64_t getSExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:111
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:240
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:861
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:839
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:826
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:65
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:685
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:355
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:228
arg_iterator arg_begin()
Definition: Function.h:762
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:298
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:345
size_t arg_size() const
Definition: Function.h:795
const GlobalValue * getGlobal() const
const GlobalObject * getBaseObject() const
Definition: Globals.cpp:467
StringRef getSection() const
Definition: Globals.cpp:162
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:547
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
bool hasComdat() const
Definition: GlobalValue.h:222
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2673
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag.
Definition: InlineAsm.h:337
@ Kind_RegDefEarlyClobber
Definition: InlineAsm.h:233
static unsigned getKind(unsigned Flags)
Definition: InlineAsm.h:326
const BasicBlock * getParent() const
Definition: Instruction.h:94
bool hasAtomicLoad() const
Return true if this atomic instruction loads from memory.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
Definition: Instructions.h:174
bool isUnordered() const
Definition: Instructions.h:260
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:143
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:96
block_iterator block_end() const
Definition: LoopInfo.h:177
block_iterator block_begin() const
Definition: LoopInfo.h:176
Context object for machine code objects.
Definition: MCContext.h:68
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:22
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:381
StringRef getName() const
getName - Get the symbol name.
Definition: MCSymbol.h:197
Machine Value Type.
static mvt_range fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
@ INVALID_SIMPLE_VALUE_TYPE
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static mvt_range integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static mvt_range fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineModuleInfo & getMMI() const
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
Definition: MachineInstr.h:64
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
uint64_t getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
const SDValue & getChain() const
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Align getAlign() const
const MachinePointerInfo & getPointerInfo() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
const SDValue & getBasePtr() const
unsigned getAlignment() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
unsigned getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
unsigned getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
unsigned getVarArgsNumFPR() const
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getVarArgsNumGPR() const
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void setVarArgsFrameIndex(int Index)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
void setFramePointerSaveIndex(int Idx)
bool useLongCalls() const
Definition: PPCSubtarget.h:320
bool hasFRSQRTE() const
Definition: PPCSubtarget.h:254
bool is32BitELFABI() const
Definition: PPCSubtarget.h:342
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:212
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:199
bool hasMMA() const
Definition: PPCSubtarget.h:275
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:376
bool hasFPCVT() const
Definition: PPCSubtarget.h:260
bool isAIXABI() const
Definition: PPCSubtarget.h:337
bool useSoftFloat() const
Definition: PPCSubtarget.h:231
bool use64BitRegs() const
use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit registers in 32-bit mode when...
Definition: PPCSubtarget.h:240
bool hasAltivec() const
Definition: PPCSubtarget.h:261
bool allowsUnalignedFPAccess() const
Definition: PPCSubtarget.h:291
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:325
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool needsTwoConstNR() const
Definition: PPCSubtarget.h:266
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:370
bool hasFSQRT() const
Definition: PPCSubtarget.h:251
bool hasP9Vector() const
Definition: PPCSubtarget.h:270
bool hasFRE() const
Definition: PPCSubtarget.h:252
bool hasFRSQRTES() const
Definition: PPCSubtarget.h:255
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:202
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:388
bool hasFPU() const
Definition: PPCSubtarget.h:264
bool useCRBits() const
useCRBits - Return true if we should store and manipulate i1 values in the individual condition regis...
Definition: PPCSubtarget.h:244
bool hasRecipPrec() const
Definition: PPCSubtarget.h:256
bool hasSTFIWX() const
Definition: PPCSubtarget.h:257
bool isSVR4ABI() const
Definition: PPCSubtarget.h:338
bool hasInvariantFunctionDescriptors() const
Definition: PPCSubtarget.h:294
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:191
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:329
bool hasEFPU2() const
Definition: PPCSubtarget.h:263
bool hasPrefixInstrs() const
Definition: PPCSubtarget.h:273
bool hasPartwordAtomics() const
Definition: PPCSubtarget.h:299
bool hasSPE() const
Definition: PPCSubtarget.h:262
bool hasLFIWAX() const
Definition: PPCSubtarget.h:258
bool isLittleEndian() const
Definition: PPCSubtarget.h:247
bool hasFCPSGN() const
Definition: PPCSubtarget.h:250
bool isTargetLinux() const
Definition: PPCSubtarget.h:335
bool hasP9Altivec() const
Definition: PPCSubtarget.h:271
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:394
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:400
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:209
bool has64BitSupport() const
has64BitSupport - Return true if the selected CPU supports 64-bit instructions, regardless of whether...
Definition: PPCSubtarget.h:229
bool is64BitELFABI() const
Definition: PPCSubtarget.h:341
bool hasFPRND() const
Definition: PPCSubtarget.h:259
bool isELFv2ABI() const
bool hasP8Vector() const
Definition: PPCSubtarget.h:267
bool pairedVectorMemops() const
Definition: PPCSubtarget.h:276
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:406
bool enableMachineScheduler() const override
Scheduling customization.
bool hasFRES() const
Definition: PPCSubtarget.h:253
bool isISA3_1() const
Definition: PPCSubtarget.h:319
bool hasLDBRX() const
Definition: PPCSubtarget.h:282
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:382
bool isISA3_0() const
Definition: PPCSubtarget.h:318
bool hasVSX() const
Definition: PPCSubtarget.h:265
bool hasDirectMove() const
Definition: PPCSubtarget.h:300
bool hasP8Altivec() const
Definition: PPCSubtarget.h:268
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
unsigned getStackProbeSize(MachineFunction &MF) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always beneficiates from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
Instruction * emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align=1, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
bool hasInlineStackProbe(MachineFunction &MF) const override
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=None) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
Instruction * emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
const SDValue & getOperand(unsigned Num) const
ArrayRef< SDUse > ops() const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
const SDNodeFlags getFlags() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool isUndef() const
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
Definition: SectionKind.h:178
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:223
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:690
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=None, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:706
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:423
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:700
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:950
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
Definition: SelectionDAG.h:934
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:797
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:449
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:695
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0)
Test whether V has a splatted value for all the demanded elements.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:922
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:447
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:444
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:742
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:450
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:644
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:737
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:768
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:814
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
Definition: SelectionDAG.h:905
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVMContext * getContext() const
Definition: SelectionDAG.h:454
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:523
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:448
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:375
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:442
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
const_iterator begin() const
Definition: SmallSet.h:223
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
void clear()
Definition: SmallSet.h:218
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
const_iterator end() const
Definition: SmallSet.h:229
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:73
size_t size() const
Definition: SmallVector.h:70
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:558
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
void push_back(const T &Elt)
Definition: SmallVector.h:404
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1169
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:130
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:511
LLVM_NODISCARD const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:152
LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:160
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:42
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:181
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:67
Class to represent struct types.
Definition: DerivedTypes.h:212
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
const TargetMachine & getTargetMachine() const
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
NegatibleCost
Enum that specifies when a float negation is beneficial.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetOptions Options
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned EnableAIXExtendedAltivecABI
EnableAIXExtendedAltivecABI - This flag returns true when -vec-extabi is specified.
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
static TypeSize Fixed(ScalarTy MinVal)
Definition: TypeSize.h:418
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:235
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:148
static Type * getVoidTy(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:313
@ FloatTyID
32-bit floating point type
Definition: Type.h:59
@ DoubleTyID
64-bit floating point type
Definition: Type.h:60
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:62
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:151
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:202
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
Base class of all SIMD vector types.
Definition: DerivedTypes.h:391
Iterator for intrusive lists based on ilist_node.
self_iterator getIterator()
Definition: ilist_node.h:81
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
@ C
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:651
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:229
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:954
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:950
@ TargetConstantPool
Definition: ISDOpcodes.h:161
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:456
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ FLT_ROUNDS_
FLT_ROUNDS_ - Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to ...
Definition: ISDOpcodes.h:772
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:140
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:243
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:615
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:983
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1055
@ STRICT_FCEIL
Definition: ISDOpcodes.h:406
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:262
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:848
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:232
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:863
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:681
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:460
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:192
@ RETURNADDR
Definition: ISDOpcodes.h:88
@ GlobalAddress
Definition: ISDOpcodes.h:71
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:688
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:513
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:371
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:589
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:248
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:457
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:800
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:790
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:222
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1021
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:392
@ GlobalTLSAddress
Definition: ISDOpcodes.h:72
@ FrameIndex
Definition: ISDOpcodes.h:73
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:675
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:430
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:558
@ TargetExternalSymbol
Definition: ISDOpcodes.h:162
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:879
@ TargetJumpTable
Definition: ISDOpcodes.h:160
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1042
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:857
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:808
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:905
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:888
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:329
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:628
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1051
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:215
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:979
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:157
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:410
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:570
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:606
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:550
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:541
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition: ISDOpcodes.h:847
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:404
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:505
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:678
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:405
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:643
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1062
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:840
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:873
@ ConstantPool
Definition: ISDOpcodes.h:75
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:696
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:575
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:775
@ STRICT_FROUND
Definition: ISDOpcodes.h:408
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:637
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:429
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:407
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:925
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:122
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:87
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:423
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:445
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:422
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:734
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1010
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:450
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:581
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1030
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:177
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:272
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:381
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:494
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:763
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:729
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:922
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:403
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:134
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:684
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:974
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:898
@ BlockAddress
Definition: ISDOpcodes.h:77
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:664
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:59
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:470
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:320
@ AssertZext
Definition: ISDOpcodes.h:60
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:968
@ STRICT_FRINT
Definition: ISDOpcodes.h:402
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1130
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1027
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:185
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:158
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:485
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1325
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1241
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1292
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1272
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1331
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1252
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:72
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:147
@ VecShuffle
Definition: NVPTX.h:88
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:143
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:107
@ MO_GOT_FLAG
MO_GOT_FLAG - If this bit is set the symbol reference is to be computed via the GOT.
Definition: PPC.h:112
@ MO_TPREL_HA
Definition: PPC.h:153
@ MO_PLT
On a symbol operand "FOO", this indicates that the reference is actually to "FOO@plt".
Definition: PPC.h:99
@ MO_TLS
Definition: PPC.h:162
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set the symbol reference is relative to TLS Initial Exec model.
Definition: PPC.h:124
@ MO_TPREL_LO
Definition: PPC.h:152
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:149
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:138
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:133
@ MO_HA
Definition: PPC.h:150
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:103
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ VABSD
An SDNode for Power9 vector absolute value difference.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ FP_TO_UINT_IN_VSR
Floating-point-to-interger conversion instructions.
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS model, produces an ADD instruction that ...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ RET_FLAG
Return with a flag operand, matched by 'blr'.
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ XSMAXCDP
XSMAXCDP, XSMINCDP - C-type min/max instructions.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:64
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
@ GeneralDynamic
Definition: CodeGen.h:43
@ FS
Definition: X86.h:176
@ XMC_PR
Program Code.
Definition: XCOFF.h:40
@ XTY_ER
External reference.
Definition: XCOFF.h:176
CodeModel::Model getCodeModel()
constexpr double e
Definition: MathExtras.h:58
@ BCTR
Definition: ELF.h:92
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
This class represents lattice values for constants.
Definition: AllocatorList.h:23
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:458
static bool isIndirectCall(const MachineInstr &MI)
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:409
STATISTIC(NumFunctions, "Total number of functions")
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:148
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:374
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:497
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:338
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:371
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:664
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:339
unsigned M1(unsigned Val)
Definition: VE.h:372
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1505
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition: MathExtras.h:696
const NoneType None
Definition: None.h:23
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:492
bool convertToNonDenormSingle(APInt &ArgAPInt)
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:1341
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:157
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
@ Mul
Product of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:158
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1581
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:673
unsigned M0(unsigned Val)
Definition: VE.h:371
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:762
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1556
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:221
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:778
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:944
#define N
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:163
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:190
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:178
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:355
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:121
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:246
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:131
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:333
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:345
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:278
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:341
std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
Definition: ValueTypes.cpp:149
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:146
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:285
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:178
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:290
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:126
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:141
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:298
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:407
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:136
bool isInConsecutiveRegs() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:66
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:119
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoInfs() const
void setNoFPExcept(bool b)
bool hasNoNaNs() const
bool hasNoSignedZeros() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)